aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td242
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp29
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp282
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp130
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp61
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp162
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp69
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp43
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp904
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp48
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp714
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td398
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp1147
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h51
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td505
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp222
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp64
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp72
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h23
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp375
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td19
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp108
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp118
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h18
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td120
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td200
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td1004
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td157
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td423
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Schedule.td11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp641
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp81
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h113
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td182
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp81
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp3
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp324
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp53
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp40
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h56
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp110
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp31
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp28
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h1
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h69
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp157
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h14
-rw-r--r--contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h27
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.h47
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.td134
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp71
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h31
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp30
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp106
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp458
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp63
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def78
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp601
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h109
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp360
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp323
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp23
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td101
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp135
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp55
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp247
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp34
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h196
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp95
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp58
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp182
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td356
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/DSInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td66
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp446
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp31
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp522
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h37
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp70
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Instructions.td8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp181
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp101
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIDefines.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp385
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp231
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp164
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp19
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp2272
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h43
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp99
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp1504
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1112
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h116
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td161
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td159
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td32
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp1062
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp29
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp830
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp406
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp133
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp97
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp80
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp361
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SMInstructions.td177
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td96
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp352
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h204
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp75
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h24
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td47
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td279
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td173
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td124
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td117
-rw-r--r--contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td93
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp116
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp84
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallLowering.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp1022
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp672
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td31
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td172
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td145
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td20
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td89
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td66
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp94
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp37
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMacroFusion.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp414
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h24
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp136
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h22
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp18
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp20
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp36
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp65
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp8
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp13
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRInstrInfo.td59
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td20
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp10
-rw-r--r--contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp8
-rw-r--r--contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPF.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp14
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp96
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp14
-rw-r--r--contrib/llvm/lib/Target/BPF/BTF.def33
-rw-r--r--contrib/llvm/lib/Target/BPF/BTF.h209
-rw-r--r--contrib/llvm/lib/Target/BPF/BTFDebug.cpp759
-rw-r--r--contrib/llvm/lib/Target/BPF/BTFDebug.h285
-rw-r--r--contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp18
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp74
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td42
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp1
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h79
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td2974
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td4736
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td907
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td5240
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td3337
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h168
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp93
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp104
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp14
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp20
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp146
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp350
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp73
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td (renamed from contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td)34
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp238
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td1539
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td305
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td308
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp37
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td688
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td64
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td11
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td (renamed from contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td)22
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td41
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h17
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp28
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp86
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h85
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp88
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h44
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp58
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h3
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp6
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp4
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp36
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h12
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp3
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp9
-rw-r--r--contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp580
-rw-r--r--contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp387
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp36
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h7
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp178
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp59
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp81
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h53
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp211
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp27
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h27
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.h2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td18
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp37
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp127
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h8
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td422
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp45
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td1483
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp3
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td29
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp208
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp29
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp17
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp15
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h1
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp15
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td15
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td5
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td11
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td51
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp115
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp14
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp23
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h11
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td12
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td7
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp254
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCCState.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp335
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallLowering.h44
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCondMov.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp99
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td32
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp115
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp46
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td29
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp92
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp26
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td28
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp18
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp37
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h7
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.td5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp35
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp78
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td66
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp19
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp122
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp21
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h13
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp10
-rw-r--r--contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp66
-rw-r--r--contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h49
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp130
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h81
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h38
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp43
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h41
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp76
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h60
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp102
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h52
-rw-r--r--contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp22
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2.h35
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2.td59
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp153
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td34
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp27
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h39
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp76
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp188
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h63
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td235
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp54
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h49
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td109
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp117
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp14
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h62
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp55
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h52
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td60
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Schedule.td39
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp56
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h97
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp119
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h45
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp18
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h26
-rw-r--r--contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h30
-rw-r--r--contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp165
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp211
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp194
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h109
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h60
-rw-r--r--contrib/llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td18
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp42
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp139
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp491
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1041
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td174
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp559
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h82
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td380
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td316
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td (renamed from contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td)11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp95
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp688
-rw-r--r--contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp38
-rw-r--r--contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp33
-rw-r--r--contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp130
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h113
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp4
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp83
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h10
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp16
-rw-r--r--contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h8
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCV.h6
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCV.td6
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp556
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp126
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp328
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h24
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td9
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td13
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td271
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td189
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td39
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td35
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td37
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td31
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp2
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td352
-rw-r--r--contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp18
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h (renamed from contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h)56
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp79
-rw-r--r--contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h36
-rw-r--r--contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp55
-rw-r--r--contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp4
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h2
-rwxr-xr-xcontrib/llvm/lib/Target/Sparc/LeonFeatures.td4
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp20
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h (renamed from contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h)5
-rw-r--r--contrib/llvm/lib/Target/Sparc/Sparc.td6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp378
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td5
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td79
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp23
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp66
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp37
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp30
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp203
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td26
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td86
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td7
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td12
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td80
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td85
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td66
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td64
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp489
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h16
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp3
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp16
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp9
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp778
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp104
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp243
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h8
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp78
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp31
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h95
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp143
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h64
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp21
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/README.txt18
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.h6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.td12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp87
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp112
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp238
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp693
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp46
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h38
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp87
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp116
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp218
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp161
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp442
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp94
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h29
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp611
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h25
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td448
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td71
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td104
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td116
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td54
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td131
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td121
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td46
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td67
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td794
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp79
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp123
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp287
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp84
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp164
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h26
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp (renamed from contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp)71
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp10
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp10
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp19
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp157
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp23
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h17
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp29
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp7
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h20
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp119
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp408
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp53
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp12
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h13
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp443
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h1
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/ShadowCallStack.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp45
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h13
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h28
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td208
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp33
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallLowering.cpp41
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallLowering.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86CmovConversion.cpp6
-rw-r--r--contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp585
-rw-r--r--contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp156
-rw-r--r--contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp9
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp113
-rw-r--r--contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp47
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp1314
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp8589
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h117
-rw-r--r--contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp253
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td2047
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td214
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td642
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td14
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td20
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td38
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td3
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td163
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp535
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h29
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td222
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td10
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td1304
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td58
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td228
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td156
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp337
-rw-r--r--contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h477
-rw-r--r--contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp192
-rw-r--r--contrib/llvm/lib/Target/X86/X86MacroFusion.cpp24
-rw-r--r--contrib/llvm/lib/Target/X86/X86MacroFusion.h5
-rw-r--r--contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86PfmCounters.td275
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp27
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td23
-rw-r--r--contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp27
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedBroadwell.td433
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedHaswell.td457
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedPredicates.td24
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td324
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td468
-rwxr-xr-xcontrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td498
-rw-r--r--contrib/llvm/lib/Target/X86/X86Schedule.td421
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleAtom.td85
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td1282
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td246
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleSLM.td36
-rw-r--r--contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td144
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp58
-rw-r--r--contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h13
-rw-r--r--contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp481
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp30
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h51
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp52
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp876
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h12
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp6
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp4
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp12
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp5
785 files changed, 71214 insertions, 36467 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index edda13ce97ef..c36d9354f3ba 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -32,12 +32,14 @@ class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
FunctionPass *createAArch64CondBrTuning();
+FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
FunctionPass *createAArch64AdvSIMDScalar();
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
FunctionPass *createAArch64SIMDInstrOptPass();
ModulePass *createAArch64PromoteConstantPass();
@@ -46,6 +48,7 @@ FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
+FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -53,18 +56,23 @@ FunctionPass *createAArch64CollectLOHPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
+FunctionPass *createAArch64PreLegalizeCombiner();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
+void initializeAArch64BranchTargetsPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry&);
void initializeAArch64CondBrTuningPass(PassRegistry &);
+void initializeAArch64CompressJumpTablesPass(PassRegistry&);
void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
+void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index a69d38144c78..8f79140cba64 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -65,25 +65,56 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+def FeaturePAN : SubtargetFeature<
+ "pan", "HasPAN", "true",
+ "Enables ARM v8.1 Privileged Access-Never extension">;
+
+def FeatureLOR : SubtargetFeature<
+ "lor", "HasLOR", "true",
+ "Enables ARM v8.1 Limited Ordering Regions extension">;
+
+def FeatureVH : SubtargetFeature<
+ "vh", "HasVH", "true",
+ "Enables ARM v8.1 Virtual Host extension">;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Full FP16", [FeatureFPARMv8]>;
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+ "Enable FP16 FML instructions", [FeatureFullFP16]>;
+
def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
"Enable Statistical Profiling extension">;
+def FeaturePAN_RWV : SubtargetFeature<
+ "pan-rwv", "HasPAN_RWV", "true",
+ "Enable v8.2 PAN s1e1R and s1e1W Variants",
+ [FeaturePAN]>;
+
+// UAO PState
+def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
+ "Enable v8.2 UAO PState">;
+
+def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
+ "true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
+
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions">;
-/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
+def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
+ "Has zero-cycle zeroing instructions for generic registers">;
+
+def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true",
+ "Has zero-cycle zeroing instructions for FP registers">;
-/// Cyclone has instructions which zero registers for "free".
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions">;
+ "Has zero-cycle zeroing instructions",
+ [FeatureZCZeroingGP, FeatureZCZeroingFP]>;
/// ... but the floating-point version doesn't quite work in rare cases on older
/// CPUs.
@@ -96,13 +127,14 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
"Disallow all unaligned memory "
"access">;
-def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
- "Reserve X18, making it unavailable "
- "as a GPR">;
+foreach i = {1-7,18,20} in
+ def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
+ "Reserve X"#i#", making it unavailable "
+ "as a GPR">;
-def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true",
- "Reserve X20, making it unavailable "
- "as a GPR">;
+foreach i = {8-15,18} in
+ def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
+ "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
@@ -117,11 +149,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature<
def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
"CustomAsCheapAsMove", "true",
- "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+ "Use custom handling of cheap instructions">;
def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
"ExynosAsCheapAsMove", "true",
- "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()",
+ "Use Exynos specific handling of cheap instructions",
[FeatureCustomCheapAsMoveHandling]>;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
@@ -156,10 +188,18 @@ def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+ "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+ "CPU fuses arithmetic and logic operations">;
+
def FeatureFuseCCSelect : SubtargetFeature<
"fuse-csel", "HasFuseCCSelect", "true",
"CPU fuses conditional select operations">;
+def FeatureFuseCryptoEOR : SubtargetFeature<
+ "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+ "CPU fuses AES/PMULL and EOR operations">;
+
def FeatureFuseLiterals : SubtargetFeature<
"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
@@ -168,6 +208,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureForce32BitJumpTables
+ : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+ "Force jump table entries to be 32-bits wide except at MinSize">;
+
def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
"Enable support for RCPC extension">;
@@ -179,6 +223,66 @@ def FeatureDotProd : SubtargetFeature<
"dotprod", "HasDotProd", "true",
"Enable dot product support">;
+def FeaturePA : SubtargetFeature<
+ "pa", "HasPA", "true",
+ "Enable v8.3-A Pointer Authentication enchancement">;
+
+def FeatureJS : SubtargetFeature<
+ "jsconv", "HasJS", "true",
+ "Enable v8.3-A JavaScript FP conversion enchancement",
+ [FeatureFPARMv8]>;
+
+def FeatureCCIDX : SubtargetFeature<
+ "ccidx", "HasCCIDX", "true",
+ "Enable v8.3-A Extend of the CCSIDR number of sets">;
+
+def FeatureComplxNum : SubtargetFeature<
+ "complxnum", "HasComplxNum", "true",
+ "Enable v8.3-A Floating-point complex number support",
+ [FeatureNEON]>;
+
+def FeatureNV : SubtargetFeature<
+ "nv", "HasNV", "true",
+ "Enable v8.4-A Nested Virtualization Enchancement">;
+
+def FeatureRASv8_4 : SubtargetFeature<
+ "rasv8_4", "HasRASv8_4", "true",
+ "Enable v8.4-A Reliability, Availability and Serviceability extension",
+ [FeatureRAS]>;
+
+def FeatureMPAM : SubtargetFeature<
+ "mpam", "HasMPAM", "true",
+ "Enable v8.4-A Memory system Partitioning and Monitoring extension">;
+
+def FeatureDIT : SubtargetFeature<
+ "dit", "HasDIT", "true",
+ "Enable v8.4-A Data Independent Timing instructions">;
+
+def FeatureTRACEV8_4 : SubtargetFeature<
+ "tracev8.4", "HasTRACEV8_4", "true",
+ "Enable v8.4-A Trace extension">;
+
+def FeatureAM : SubtargetFeature<
+ "am", "HasAM", "true",
+ "Enable v8.4-A Activity Monitors extension">;
+
+def FeatureSEL2 : SubtargetFeature<
+ "sel2", "HasSEL2", "true",
+ "Enable v8.4-A Secure Exception Level 2 extension">;
+
+def FeatureTLB_RMI : SubtargetFeature<
+ "tlb-rmi", "HasTLB_RMI", "true",
+ "Enable v8.4-A TLB Range and Maintenance Instructions">;
+
+def FeatureFMI : SubtargetFeature<
+ "fmi", "HasFMI", "true",
+ "Enable v8.4-A Flag Manipulation Instructions">;
+
+// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
+def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
+ "Enable v8.4-A RCPC instructions with Immediate Offsets",
+ [FeatureRCPC]>;
+
def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"NegativeImmediates", "false",
"Convert immediates and instructions "
@@ -196,21 +300,65 @@ def FeatureAggressiveFMA :
"true",
"Enable Aggressive FMA for floating-point.">;
+def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
+ "Enable alternative NZCV format for floating point comparisons">;
+
+def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
+ "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
+ "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >;
+
+def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
+ "true", "Enable architectural speculation restriction" >;
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB",
+ "true", "Enable v8.5 Speculation Barrier" >;
+
+def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS",
+ "true", "Enable Speculative Store Bypass Safe bit" >;
+
+def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true",
+ "Enable v8.5a execution and data prediction invalidation instructions" >;
+
+def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP",
+ "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >;
+
+def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI",
+ "true", "Enable Branch Target Identification" >;
+
+def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
+ "true", "Enable Random Number generation instructions" >;
+
+def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
+ "true", "Enable Memory Tagging Extension" >;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
+ FeaturePAN, FeatureLOR, FeatureVH]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
- "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
+ "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
+ FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
- "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA,
+ FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
- "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>;
+ "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
+ FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
+ FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+ FeatureFMI, FeatureRCPC_IMMO]>;
+
+def HasV8_5aOps : SubtargetFeature<
+ "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
+ [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
+ FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+ FeatureBranchTargetId]
+>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -226,6 +374,8 @@ include "AArch64CallingConvention.td"
include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
+include "AArch64SchedPredicates.td"
+include "AArch64SchedPredExynos.td"
def AArch64InstrInfo : InstrInfo;
@@ -245,6 +395,7 @@ include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
include "AArch64SchedExynosM1.td"
include "AArch64SchedExynosM3.td"
+include "AArch64SchedExynosM4.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
@@ -343,6 +494,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseCryptoEOR,
FeatureNEON,
FeaturePerfMon,
FeatureZCRegMove,
@@ -356,14 +508,13 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAES,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
FeatureUseRSqrt,
- FeatureZCZeroing]>;
+ FeatureZCZeroingFP]>;
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2 processors",
@@ -371,29 +522,47 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAES,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
- FeatureZCZeroing]>;
+ FeatureZCZeroingFP]>;
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureCRC,
FeatureCrypto,
FeatureExynosCheapAsMoveHandling,
- FeatureFPARMv8,
+ FeatureForce32BitJumpTables,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroingFP]>;
+
+def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M4 processors",
+ [HasV8_2aOps,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureExynosCheapAsMoveHandling,
+ FeatureForce32BitJumpTables,
+ FeatureFP16FML,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseLiterals,
+ FeatureLSLFast,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
FeatureZCZeroing]>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -438,7 +607,7 @@ def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
FeatureLSLFast,
- HasV8_3aOps]>;
+ HasV8_4aOps]>;
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
"ThunderX2T99",
@@ -497,6 +666,21 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
FeaturePredictableSelectIsExpensive,
FeatureNEON]>;
+def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+ "HiSilicon TS-V110 processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSPE,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeatureDotProd]>;
+
def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
FeatureFuseAES,
@@ -518,7 +702,7 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
-def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -529,6 +713,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
+def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
//===----------------------------------------------------------------------===//
// Assembly parser
@@ -577,3 +763,9 @@ def AArch64 : Target {
let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "AArch64PfmCounters.td"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index a95476b91187..452fbd3488b0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -377,11 +377,10 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// Now we have a set of sets, order them by start address so
// we can iterate over them sequentially.
- llvm::sort(V.begin(), V.end(),
- [](const std::vector<Chain*> &A,
- const std::vector<Chain*> &B) {
- return A.front()->startsBefore(B.front());
- });
+ llvm::sort(V,
+ [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
+ return A.front()->startsBefore(B.front());
+ });
// As we only have two colors, we can track the global (BB-level) balance of
// odds versus evens. We aim to keep this near zero to keep both execution
@@ -453,16 +452,16 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
// change them to!
// Final tie-break with instruction order so pass output is stable (i.e. not
// dependent on malloc'd pointer values).
- llvm::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
- if (G1->size() != G2->size())
- return G1->size() > G2->size();
- if (G1->requiresFixup() != G2->requiresFixup())
- return G1->requiresFixup() > G2->requiresFixup();
- // Make sure startsBefore() produces a stable final order.
- assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
- "Starts before not total order!");
- return G1->startsBefore(G2);
- });
+ llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
+ if (G1->size() != G2->size())
+ return G1->size() > G2->size();
+ if (G1->requiresFixup() != G2->requiresFixup())
+ return G1->requiresFixup() > G2->requiresFixup();
+ // Make sure startsBefore() produces a stable final order.
+ assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+ "Starts before not total order!");
+ return G1->startsBefore(G2);
+ });
Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 52819dedc23d..0442076992e2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -21,16 +21,20 @@
#include "InstPrinter/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +81,12 @@ public:
return MCInstLowering.lowerOperand(MO, MCOp);
}
+ void EmitJumpTableInfo() override;
+ void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned JTI);
+
+ void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -100,12 +110,33 @@ public:
AU.setPreservesAll();
}
- bool runOnMachineFunction(MachineFunction &F) override {
- AArch64FI = F.getInfo<AArch64FunctionInfo>();
- STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
- bool Result = AsmPrinter::runOnMachineFunction(F);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AArch64FI = MF.getInfo<AArch64FunctionInfo>();
+ STI = static_cast<const AArch64Subtarget*>(&MF.getSubtarget());
+
+ SetupMachineFunction(MF);
+
+ if (STI->isTargetCOFF()) {
+ bool Internal = MF.getFunction().hasInternalLinkage();
+ COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ int Type =
+ COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+ OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->EmitCOFFSymbolStorageClass(Scl);
+ OutStreamer->EmitCOFFSymbolType(Type);
+ OutStreamer->EndCOFFSymbolDef();
+ }
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // Emit the XRay table for this function.
emitXRayTable();
- return Result;
+
+ // We didn't modify anything.
+ return false;
}
private:
@@ -208,7 +239,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
}
@@ -433,6 +464,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps - 2, OS);
}
+void AArch64AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
+
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+ EmitAlignment(Log2_32(Size));
+ OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+ for (auto *JTBB : JTBBs)
+ emitJumpTableEntry(MJTI, JTBB, JTI);
+ }
+}
+
+void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned JTI) {
+ const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+
+ if (Size == 4) {
+ // .word LBB - LJTI
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ } else {
+ // .byte (LBB - LBB) >> 2 (or .hword)
+ const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, OutContext), OutContext);
+ }
+
+ OutStreamer->EmitValue(Value, Size);
+}
+
+/// Small jump tables contain an unsigned byte or half, representing the offset
+/// from the lowest-addressed possible destination to the desired basic
+/// block. Since all instructions are 4-byte aligned, this is further compressed
+/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
+/// materialize the correct destination we need:
+///
+/// adr xDest, .LBB0_0
+/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
+/// add xDest, xDest, xScratch, lsl #2
+void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned ScratchRegW =
+ STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
+ unsigned TableReg = MI.getOperand(2).getReg();
+ unsigned EntryReg = MI.getOperand(3).getReg();
+ int JTIdx = MI.getOperand(4).getIndex();
+ bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+
+ // This has to be first because the compression pass based its reachability
+ // calculations on the start of the JumpTableDest instruction.
+ auto Label =
+ MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
+ .addReg(DestReg)
+ .addExpr(MCSymbolRefExpr::create(
+ Label, MF->getContext())));
+
+ // Load the number of instruction-steps to offset from the label.
+ unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+ EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
+ .addReg(ScratchRegW)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(IsByteEntry ? 0 : 1));
+
+ // Multiply the steps by 4 and add to the already materialized base label
+ // address.
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(DestReg)
+ .addReg(ScratchReg)
+ .addImm(2));
+}
+
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -503,7 +632,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
unsigned DestReg = MI.getOperand(0).getReg();
- if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
+ if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
@@ -559,6 +688,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->EmitLabel(LOHLabel);
}
+ AArch64TargetStreamer *TS =
+ static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
// Do any manual lowerings.
switch (MI->getOpcode()) {
default:
@@ -585,12 +716,27 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->EmitRawText(StringRef(OS.str()));
}
return;
+
+ case AArch64::EMITBKEY: {
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+ if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ ExceptionHandlingType != ExceptionHandling::ARM)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ OutStreamer->EmitCFIBKeyFrame();
+ return;
+ }
}
// Tail calls use pseudo instructions so they have the proper code-gen
// attributes (isCall, isReturn, etc.). We lower them to the real
// instruction here.
- case AArch64::TCRETURNri: {
+ case AArch64::TCRETURNri:
+ case AArch64::TCRETURNriBTI:
+ case AArch64::TCRETURNriALL: {
MCInst TmpInst;
TmpInst.setOpcode(AArch64::BR);
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
@@ -660,6 +806,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case AArch64::JumpTableDest32: {
+ // We want:
+ // ldrsw xScratch, [xTable, xEntry, lsl #2]
+ // add xDest, xTable, xScratch
+ unsigned DestReg = MI->getOperand(0).getReg(),
+ ScratchReg = MI->getOperand(1).getReg(),
+ TableReg = MI->getOperand(2).getReg(),
+ EntryReg = MI->getOperand(3).getReg();
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+ .addReg(ScratchReg)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(TableReg)
+ .addReg(ScratchReg)
+ .addImm(0));
+ return;
+ }
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ LowerJumpTableDestSmall(*OutStreamer, *MI);
+ return;
+
case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
@@ -683,6 +855,100 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHABLE_TAIL_CALL:
LowerPATCHABLE_TAIL_CALL(*MI);
return;
+
+ case AArch64::SEH_StackAlloc:
+ TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveFPLR:
+ TS->EmitARM64WinCFISaveFPLR(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveFPLR_X:
+ assert(MI->getOperand(0).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFPLRX(-MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_SaveReg:
+ TS->EmitARM64WinCFISaveReg(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveReg_X:
+ assert(MI->getOperand(1).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveRegX(MI->getOperand(0).getImm(),
+ -MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveRegP:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp");
+ TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveRegP_X:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp_x");
+ assert(MI->getOperand(2).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveRegPX(MI->getOperand(0).getImm(),
+ -MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveFReg:
+ TS->EmitARM64WinCFISaveFReg(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveFReg_X:
+ assert(MI->getOperand(1).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFRegX(MI->getOperand(0).getImm(),
+ -MI->getOperand(1).getImm());
+ return;
+
+ case AArch64::SEH_SaveFRegP:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp");
+ TS->EmitARM64WinCFISaveFRegP(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SaveFRegP_X:
+ assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+ "Non-consecutive registers not allowed for save_regp_x");
+ assert(MI->getOperand(2).getImm() < 0 &&
+ "Pre increment SEH opcode must have a negative offset");
+ TS->EmitARM64WinCFISaveFRegPX(MI->getOperand(0).getImm(),
+ -MI->getOperand(2).getImm());
+ return;
+
+ case AArch64::SEH_SetFP:
+ TS->EmitARM64WinCFISetFP();
+ return;
+
+ case AArch64::SEH_AddFP:
+ TS->EmitARM64WinCFIAddFP(MI->getOperand(0).getImm());
+ return;
+
+ case AArch64::SEH_Nop:
+ TS->EmitARM64WinCFINop();
+ return;
+
+ case AArch64::SEH_PrologEnd:
+ TS->EmitARM64WinCFIPrologEnd();
+ return;
+
+ case AArch64::SEH_EpilogStart:
+ TS->EmitARM64WinCFIEpilogStart();
+ return;
+
+ case AArch64::SEH_EpilogEnd:
+ TS->EmitARM64WinCFIEpilogEnd();
+ return;
}
// Finally, do the automated lowerings for everything else.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
new file mode 100644
index 000000000000..da70a624c5be
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -0,0 +1,130 @@
+//===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks. Note that this does not do anything for RET
+// instructions, as they can be more precisely protected by return address
+// signing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-branch-targets"
+#define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets"
+
+namespace {
+class AArch64BranchTargets : public MachineFunctionPass {
+public:
+ static char ID;
+ AArch64BranchTargets() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; }
+
+private:
+ void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump);
+};
+} // end anonymous namespace
+
+char AArch64BranchTargets::ID = 0;
+
+INITIALIZE_PASS(AArch64BranchTargets, "aarch64-branch-targets",
+ AARCH64_BRANCH_TARGETS_NAME, false, false)
+
+void AArch64BranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createAArch64BranchTargetsPass() {
+ return new AArch64BranchTargets();
+}
+
+bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("branch-target-enforcement"))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "********** AArch64 Branch Targets **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ // LLVM does not consider basic blocks which are the targets of jump tables
+ // to be address-taken (the address can't escape anywhere else), but they are
+ // used for indirect branches, so need BTI instructions.
+ SmallPtrSet<MachineBasicBlock *, 8> JumpTableTargets;
+ if (auto *JTI = MF.getJumpTableInfo())
+ for (auto &JTE : JTI->getJumpTables())
+ for (auto *MBB : JTE.MBBs)
+ JumpTableTargets.insert(MBB);
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool CouldCall = false, CouldJump = false;
+ // If the function is address-taken or externally-visible, it could be
+ // indirectly called. PLT entries and tail-calls use BR, but when they are
+ // are in guarded pages should all use x16 or x17 to hold the called
+ // address, so we don't need to set CouldJump here. BR instructions in
+ // non-guarded pages (which might be non-BTI-aware code) are allowed to
+ // branch to a "BTI c" using any register.
+ if (&MBB == &*MF.begin() && (F.hasAddressTaken() || !F.hasLocalLinkage()))
+ CouldCall = true;
+
+ // If the block itself is address-taken, it could be indirectly branched
+ // to, but not called.
+ if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB))
+ CouldJump = true;
+
+ if (CouldCall || CouldJump) {
+ addBTI(MBB, CouldCall, CouldJump);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
+ bool CouldJump) {
+ LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "")
+ << (CouldCall ? "c" : "") << " to " << MBB.getName()
+ << "\n");
+
+ const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ unsigned HintNum = 32;
+ if (CouldCall)
+ HintNum |= 2;
+ if (CouldJump)
+ HintNum |= 4;
+ assert(HintNum != 32 && "No target kinds!");
+
+ auto MBBI = MBB.begin();
+
+ // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there.
+ if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP ||
+ MBBI->getOpcode() == AArch64::PACIBSP))
+ return;
+
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AArch64::HINT))
+ .addImm(HintNum);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 26d532555e78..5980e5684e89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -227,32 +227,45 @@ void AArch64CallLowering::splitToValueTypes(
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
-
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
- assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
- bool Success = true;
- if (VReg) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
- // We zero-extend i1s to i8.
- if (MRI.getType(VReg).getSizeInBits() == 1)
- VReg = MIRBuilder.buildZExt(LLT::scalar(8), VReg)->getOperand(0).getReg();
+ bool Success = true;
+ if (!VRegs.empty()) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
auto &DL = F.getParent()->getDataLayout();
+ LLVMContext &Ctx = Val->getType()->getContext();
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> SplitArgs;
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
- [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, VReg, Offset);
- });
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ // We zero-extend i1s to i8.
+ unsigned CurVReg = VRegs[i];
+ if (MRI.getType(VRegs[i]).getSizeInBits() == 1) {
+ CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg)
+ ->getOperand(0)
+ .getReg();
+ }
+
+ ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(),
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, CurVReg, Offset);
+ });
+ }
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
@@ -324,6 +337,10 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (Subtarget.hasCustomCallingConv())
+ Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@@ -364,8 +381,14 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIB.add(Callee);
// Tell the call which registers are clobbered.
- auto TRI = MF.getSubtarget().getRegisterInfo();
- MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
+ auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+ if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+ MIB.addRegMask(Mask);
+
+ if (TRI->isAnyArgRegReserved(MF))
+ TRI->emitReservedArgRegCallError(MF);
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 68c127fc42e5..1c2bd6a4de5d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -34,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 30492003df14..5db941e9dac7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -123,7 +123,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
// Vararg functions on windows pass floats in integer registers
def CC_AArch64_Win64_VarArg : CallingConv<[
- CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
CCIfType<[f64], CCBitConvertToType<i64>>,
CCDelegateTo<CC_AArch64_AAPCS>
]>;
@@ -288,6 +288,20 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ (sequence "Q%u", 8, 23))>;
+
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
@@ -362,5 +376,7 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
def CSR_AArch64_AAPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
new file mode 100644
index 000000000000..0924a27e2586
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -0,0 +1,162 @@
+//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass looks at the basic blocks each jump-table refers to and works out
+// whether they can be emitted in a compressed form (with 8 or 16-bit
+// entries). If so, it changes the opcode and flags them in the associated
+// AArch64FunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-jump-tables"
+
+STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
+STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
+STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
+
+namespace {
+class AArch64CompressJumpTables : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<int, 8> BlockInfo;
+
+ int computeBlockSize(MachineBasicBlock &MBB);
+ void scanFunction();
+
+ bool compressJumpTable(MachineInstr &MI, int Offset);
+
+public:
+ static char ID;
+ AArch64CompressJumpTables() : MachineFunctionPass(ID) {
+ initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+ StringRef getPassName() const override {
+ return "AArch64 Compress Jump Tables";
+ }
+};
+char AArch64CompressJumpTables::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
+ "AArch64 compress jump tables pass", false, false)
+
+int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+ int Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+}
+
+void AArch64CompressJumpTables::scanFunction() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+
+ int Offset = 0;
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockInfo[MBB.getNumber()] = Offset;
+ Offset += computeBlockSize(MBB);
+ }
+}
+
+bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
+ int Offset) {
+ if (MI.getOpcode() != AArch64::JumpTableDest32)
+ return false;
+
+ int JTIdx = MI.getOperand(4).getIndex();
+ auto &JTInfo = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
+
+ // The jump-table might have been optimized away.
+ if (JT.MBBs.empty())
+ return false;
+
+ int MaxOffset = std::numeric_limits<int>::min(),
+ MinOffset = std::numeric_limits<int>::max();
+ MachineBasicBlock *MinBlock = nullptr;
+ for (auto Block : JT.MBBs) {
+ int BlockOffset = BlockInfo[Block->getNumber()];
+ assert(BlockOffset % 4 == 0 && "misaligned basic block");
+
+ MaxOffset = std::max(MaxOffset, BlockOffset);
+ if (BlockOffset <= MinOffset) {
+ MinOffset = BlockOffset;
+ MinBlock = Block;
+ }
+ }
+
+ // The ADR instruction needed to calculate the address of the first reachable
+ // basic block can address +/-1MB.
+ if (!isInt<21>(MinOffset - Offset)) {
+ ++NumJT32;
+ return false;
+ }
+
+ int Span = MaxOffset - MinOffset;
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ if (isUInt<8>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest8));
+ ++NumJT8;
+ return true;
+ } else if (isUInt<16>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest16));
+ ++NumJT16;
+ return true;
+ }
+
+ ++NumJT32;
+ return false;
+}
+
+bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
+ bool Changed = false;
+ MF = &MFIn;
+
+ const auto &ST = MF->getSubtarget<AArch64Subtarget>();
+ TII = ST.getInstrInfo();
+
+ if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+ return false;
+
+ scanFunction();
+
+ for (MachineBasicBlock &MBB : *MF) {
+ int Offset = BlockInfo[MBB.getNumber()];
+ for (MachineInstr &MI : MBB) {
+ Changed |= compressJumpTable(MI, Offset);
+ Offset += TII->getInstSizeInBytes(MI);
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
+ return new AArch64CompressJumpTables();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9226a9dd879b..f7190d58fbf9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -835,36 +835,55 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
}
case AArch64::LOADgot: {
- // Expand into ADRP + LDR.
+ MachineFunction *MF = MBB.getParent();
unsigned DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .add(MI.getOperand(0))
- .addReg(DstReg);
-
- if (MO1.isGlobal()) {
- MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
- MIB2.addGlobalAddress(MO1.getGlobal(), 0,
- Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- } else if (MO1.isSymbol()) {
- MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
- MIB2.addExternalSymbol(MO1.getSymbolName(),
- Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
+ // Tiny codemodel expand to LDR
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(AArch64::LDRXl), DstReg);
+
+ if (MO1.isGlobal()) {
+ MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
+ } else if (MO1.isSymbol()) {
+ MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
+ } else {
+ assert(MO1.isCPI() &&
+ "Only expect globals, externalsymbols, or constant pools");
+ MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
+ }
} else {
- assert(MO1.isCPI() &&
- "Only expect globals, externalsymbols, or constant pools");
- MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | AArch64II::MO_PAGE);
- MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ // Small codemodel expand into ADRP + LDR.
+ MachineInstrBuilder MIB1 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
+ MachineInstrBuilder MIB2 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
+ .add(MI.getOperand(0))
+ .addReg(DstReg);
+
+ if (MO1.isGlobal()) {
+ MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
+ MIB2.addGlobalAddress(MO1.getGlobal(), 0,
+ Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ } else if (MO1.isSymbol()) {
+ MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
+ MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
+ AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else {
+ assert(MO1.isCPI() &&
+ "Only expect globals, externalsymbols, or constant pools");
+ MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+ Flags | AArch64II::MO_PAGE);
+ MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+ Flags | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ }
+
+ transferImpOps(MI, MIB1, MIB2);
}
-
- transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 572d1c22feea..47550cabb9f0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2016,8 +2016,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
- std::prev(FuncInfo.InsertPt)->eraseFromParent();
- ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+ MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
+ ResultReg = std::prev(I)->getOperand(0).getReg();
+ removeDeadCode(I, std::next(I));
} else
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
/*IsKill=*/true,
@@ -2038,7 +2039,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
break;
}
}
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
MI = nullptr;
if (Reg)
MI = MRI.getUniqueVRegDef(Reg);
@@ -2256,6 +2258,13 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (FuncInfo.MF->getFunction().hasFnAttribute(
+ Attribute::SpeculativeLoadHardening))
+ return false;
+
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -2918,6 +2927,9 @@ bool AArch64FastISel::fastLowerArguments() {
if (CC != CallingConv::C && CC != CallingConv::Swift)
return false;
+ if (Subtarget->hasCustomCallingConv())
+ return false;
+
// Only handle simple cases of up to 8 GPR and FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
@@ -3208,6 +3220,10 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ if (RegInfo->isAnyArgRegReserved(*MF))
+ RegInfo->emitReservedArgRegCallError(*MF);
+
// Issue the call.
MachineInstrBuilder MIB;
if (Subtarget->useSmallAddressing()) {
@@ -3443,6 +3459,21 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, SrcReg);
return true;
}
+ case Intrinsic::sponentry: {
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+
+ // SP = FP + Fixed Object + 16
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::ADDXri), ResultReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0);
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const auto *MTI = cast<MemTransferInst>(II);
@@ -3738,6 +3769,9 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
}
+ if (!ResultReg1)
+ return false;
+
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
/*IsKill=*/true, getInvertedCondCode(CC));
@@ -4483,7 +4517,8 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
"Expected copy instruction");
Reg = MI->getOperand(1).getReg();
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
}
updateValueMap(I, Reg);
return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 6dc5d19862a9..538a8d7e8fbc 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -98,6 +98,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -114,11 +115,13 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ // Win64 EH requires a frame pointer if funclets are present, as the locals
+ // are accessed off the frame pointer in both the parent function and the
+ // funclets.
+ if (MF.hasEHFunclets())
+ return true;
// Retain behavior of always omitting the FP for leaf functions when possible.
if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
@@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
+static bool ShouldSignReturnAddress(MachineFunction &MF) {
+ // The function should be signed in the following situations:
+ // - sign-return-address=all
+ // - sign-return-address=non-leaf and the functions spills the LR
+
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address"))
+ return false;
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return false;
+
+ if (Scope.equals("all"))
+ return true;
+
+ assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
+
+ for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
+ if (Info.getReg() == AArch64::LR)
+ return true;
+
+ return false;
+}
+
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
@@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ MachineInstr::MIFlag Flag) {
+ unsigned Opc = MBBI->getOpcode();
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ int Imm = MBBI->getOperand(ImmIdx).getImm();
+ MachineInstrBuilder MIB;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("No SEH Opcode for this instruction");
+ case AArch64::LDPDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPDpre: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDPXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPXpre: {
+ unsigned Reg0 = MBBI->getOperand(1).getReg();
+ unsigned Reg1 = MBBI->getOperand(2).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRDpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRXpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPDi:
+ case AArch64::LDPDi: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPXi:
+ case AArch64::LDPXi: {
+ unsigned Reg0 = MBBI->getOperand(0).getReg();
+ unsigned Reg1 = MBBI->getOperand(1).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRXui:
+ case AArch64::LDRXui: {
+ int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRDui:
+ case AArch64::LDRDui: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ }
+ auto I = MBB->insertAfter(MBBI, MIB);
+ return I;
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+ unsigned LocalStackSize) {
+ MachineOperand *ImmOpnd = nullptr;
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Fix the offset in the SEH instruction");
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFReg:
+ ImmOpnd = &MBBI->getOperand(ImmIdx);
+ break;
+ }
+ if (ImmOpnd)
+ ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
// Convert callee-save register save/restore instruction to do stack pointer
// decrement/increment to allocate/deallocate the callee-save stack area by
// converting store/load to use pre/post increment version.
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+ bool NeedsWinCFI, bool InProlog = true) {
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
+ // instructions and associated CFI instruction.
while (MBBI->getOpcode() == AArch64::STRXpost ||
- MBBI->getOpcode() == AArch64::LDRXpre) {
- assert(MBBI->getOperand(0).getReg() != AArch64::SP);
+ MBBI->getOpcode() == AArch64::LDRXpre ||
+ MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
+ if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
+ assert(MBBI->getOperand(0).getReg() != AArch64::SP);
++MBBI;
}
-
unsigned NewOpc;
- bool NewIsUnscaled = false;
+ int Scale = 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
+ Scale = 8;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ NewOpc = AArch64::STPQpre;
+ Scale = 16;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
- NewIsUnscaled = true;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
- NewIsUnscaled = true;
+ break;
+ case AArch64::STRQui:
+ NewOpc = AArch64::STRQpre;
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
+ Scale = 8;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
+ Scale = 8;
+ break;
+ case AArch64::LDPQi:
+ NewOpc = AArch64::LDPQpost;
+ Scale = 16;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
- NewIsUnscaled = true;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
- NewIsUnscaled = true;
+ break;
+ case AArch64::LDRQui:
+ NewOpc = AArch64::LDRQpost;
break;
}
+ // Get rid of the SEH code associated with the old instruction.
+ if (NeedsWinCFI) {
+ auto SEH = std::next(MBBI);
+ if (AArch64InstrInfo::isSEHInstruction(*SEH))
+ SEH->eraseFromParent();
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
@@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
"instruction!");
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
- // Last operand is immediate offset that needs fixing.
- assert(CSStackSizeInc % 8 == 0);
- int64_t CSStackSizeIncImm = CSStackSizeInc;
- if (!NewIsUnscaled)
- CSStackSizeIncImm /= 8;
- MIB.addImm(CSStackSizeIncImm);
+ assert(CSStackSizeInc % Scale == 0);
+ MIB.addImm(CSStackSizeInc / Scale);
MIB.setMIFlags(MBBI->getFlags());
- MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
+ MIB.setMemRefs(MBBI->memoperands());
+
+ // Generate a new SEH code that corresponds to the new instruction.
+ if (NeedsWinCFI)
+ InsertSEH(*MIB, *TII,
+ InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
return std::prev(MBB.erase(MBBI));
}
@@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// Fixup callee-save register save/restore instructions to take into account
// combined SP bump by adding the local stack size to the stack offsets.
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- unsigned LocalStackSize) {
+ unsigned LocalStackSize,
+ bool NeedsWinCFI) {
+ if (AArch64InstrInfo::isSEHInstruction(MI))
+ return;
+
unsigned Opc = MI.getOpcode();
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
- if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
- assert(MI.getOperand(0).getReg() != AArch64::SP);
+ // instructions and associated CFI instruction.
+ if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
+ Opc == AArch64::CFI_INSTRUCTION) {
+ if (Opc != AArch64::CFI_INSTRUCTION)
+ assert(MI.getOperand(0).getReg() != AArch64::SP);
return;
}
- (void)Opc;
- assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
- Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
- Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
- Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
- "Unexpected callee-save save/restore opcode!");
+ unsigned Scale;
+ switch (Opc) {
+ case AArch64::STPXi:
+ case AArch64::STRXui:
+ case AArch64::STPDi:
+ case AArch64::STRDui:
+ case AArch64::LDPXi:
+ case AArch64::LDRXui:
+ case AArch64::LDPDi:
+ case AArch64::LDRDui:
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ case AArch64::STRQui:
+ case AArch64::LDPQi:
+ case AArch64::LDRQui:
+ Scale = 16;
+ break;
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ }
unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
@@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
// Last operand is immediate offset that needs fixing.
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
// All generated opcodes have scaled offsets.
- assert(LocalStackSize % 8 == 0);
- OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
+ assert(LocalStackSize % Scale == 0);
+ OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+ if (NeedsWinCFI) {
+ auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+ assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+ assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+ "Expecting a SEH instruction");
+ fixupSEHOpcode(MBBI, LocalStackSize);
+ }
}
static void adaptForLdStOpt(MachineBasicBlock &MBB,
@@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
//
}
+static bool ShouldSignWithAKey(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address-key"))
+ return true;
+
+ const StringRef Key =
+ F.getFnAttribute("sign-return-address-key").getValueAsString();
+ assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+ return Key.equals_lower("a_key");
+}
+
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
+ bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool HasFP = hasFP(MF);
+ bool NeedsWinCFI = needsWinCFI(MF);
+ MF.setHasWinCFI(NeedsWinCFI);
+ bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
// redzone. In most cases, the function doesn't have a redzone so let's
@@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
+ if (ShouldSignReturnAddress(MF)) {
+ if (ShouldSignWithAKey(MF))
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- int NumBytes = (int)MFI.getStackSize();
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : (int)MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
-
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
-
if (!NumBytes)
return;
// REDZONE: If the stack size is less than 128 bytes, we don't need
@@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
-
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
+ if (!NeedsWinCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
return;
}
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject = (IsWin64 && !IsFunclet) ?
+ alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
- -PrologueSaveSize);
+ MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
++MBBI;
}
+
+ // The code below is not applicable to funclets. We have emitted all the SEH
+ // opcodes that we needed to emit. The FP and BP belong to the containing
+ // function.
+ if (IsFunclet) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ return;
+ }
+
if (HasFP) {
// Only set up FP if we actually need to. Frame pointer is fp =
// sp - fixedobject - 16.
@@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 4;
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ follwed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol("__chkstk")
.addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
}
@@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
NumBytes = 0;
}
@@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
if (NeedsRealignment) {
const unsigned Alignment = MFI.getMaxAlignment();
@@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes & andMaskEncoded)
+ .setMIFlag(MachineInstr::FrameSetup);
}
}
@@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
}
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = -TD.getPointerSize(0);
@@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
+static void InsertReturnAddressAuth(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (!ShouldSignReturnAddress(MF))
+ return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // The AUTIASP instruction assembles to a hint instruction before v8.3a so
+ // this instruction can safely used for any v8a architecture.
+ // From v8.3a onwards there are optimised authenticate LR and return
+ // instructions, namely RETA{A,B}, that can be used instead.
+ if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
+ MBBI->getOpcode() == AArch64::RET_ReallyLR) {
+ BuildMI(MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
+ .copyImplicitOps(*MBBI);
+ MBB.erase(MBBI);
+ } else {
+ BuildMI(
+ MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+static bool isFuncletReturnInstr(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::CATCHRET:
+ case AArch64::CLEANUPRET:
+ return true;
+ }
+}
+
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
+ bool NeedsWinCFI = needsWinCFI(MF);
+ bool IsFunclet = false;
+
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri;
+ RetOpcode == AArch64::TCRETURNri ||
+ RetOpcode == AArch64::TCRETURNriBTI;
+ IsFunclet = isFuncletReturnInstr(*MBBI);
}
- int NumBytes = MFI.getStackSize();
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : MFI.getStackSize();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
+ auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
+
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject =
+ (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
uint64_t AfterCSRPopSize = ArgumentPopSize;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // We cannot rely on the local stack size set in emitPrologue if the function
+ // has funclets, as funclets have different local stack size requirements, and
+ // the current value set in emitPrologue may be that of the containing
+ // function.
+ if (MF.hasEHFunclets())
+ AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+ while (AArch64InstrInfo::isSEHInstruction(*Pop))
+ Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
// If the offset is 0, convert it to a post-index ldp.
- if (OffsetOp.getImm() == 0) {
- convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
- PrologueSaveSize);
- } else {
+ if (OffsetOp.getImm() == 0)
+ convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+ else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transfering the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
@@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
++LastPopI;
break;
} else if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- NumBytes + AfterCSRPopSize, TII,
- MachineInstr::FrameDestroy);
+ NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
}
@@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
- if (Done)
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
+ if (Done) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
+ }
NumBytes = 0;
}
@@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
+ if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-AFI->getCalleeSavedStackSize() + 16, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+ AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
UseFP = true;
+ } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
+ // Funclets access the locals contained in the parent's stack frame
+ // via the frame pointer, so we have to use the FP in the parent
+ // function.
+ assert(
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
+ "Funclets should only be present on Win64");
+ UseFP = true;
} else {
// We have the choice between FP and (SP or BP).
if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
@@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
+static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
+ bool NeedsWinCFI) {
+ // If we are generating register pairs for a Windows function that requires
+ // EH support, then pair consecutive registers only. There are no unwind
+ // opcodes for saves/restores of non-consectuve register pairs.
+ // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+
+ // TODO: LR can be paired with any register. We don't support this yet in
+ // the MCLayer. We need to add support for the save_lrpair unwind code.
+ if (!NeedsWinCFI)
+ return false;
+ if (Reg2 == Reg1 + 1)
+ return false;
+ return true;
+}
+
namespace {
struct RegPairInfo {
@@ -1143,7 +1597,7 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- bool IsGPR;
+ enum RegType { GPR, FPR64, FPR128 } Type;
RegPairInfo() = default;
@@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs(
if (CSI.empty())
return;
+ bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs(
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();
-
+ // On Linux, we will have either one or zero non-paired register. On Windows
+ // with CFI, we can have multiple unpaired registers in order to utilize the
+ // available unwind codes. This flag assures that the alignment fixup is done
+ // only once, as intened.
+ bool FixupDone = false;
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
- assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
- AArch64::FPR64RegClass.contains(RPI.Reg1));
- RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+ if (AArch64::GPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::GPR;
+ else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR64;
+ else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR128;
+ else
+ llvm_unreachable("Unsupported register class.");
// Add the next reg to the pair if it is in the same register class.
if (i + 1 < Count) {
unsigned NextReg = CSI[i + 1].getReg();
- if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
- (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
- RPI.Reg2 = NextReg;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ if (AArch64::GPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR64:
+ if (AArch64::FPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR128:
+ if (AArch64::FPR128RegClass.contains(NextReg))
+ RPI.Reg2 = NextReg;
+ break;
+ }
}
// If either of the registers to be saved is the lr register, it means that
// we also need to save lr in the shadow call stack.
if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
- if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
report_fatal_error("Must reserve x18 to use shadow call stack");
NeedShadowCallStackProlog = true;
}
@@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
- // Round up size of non-pair to pair size if we need to pad the
- // callee-save area to ensure 16-byte alignment.
- Offset -= 16;
+ int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
+ Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
+ RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ FixupDone = true;
+ Offset -= 8;
+ assert(Offset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
- AFI->setCalleeSaveStackHasFreeSpace(true);
- } else
- Offset -= RPI.isPaired() ? 16 : 8;
- assert(Offset % 8 == 0);
- RPI.Offset = Offset / 8;
+ }
+
+ assert(Offset % Scale == 0);
+ RPI.Offset = Offset / Scale;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
@@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool NeedsWinCFI = needsWinCFI(MF);
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
@@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+ BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
}
@@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- if (RPI.IsGPR)
- StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
- else
- StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+ "Windows unwdinding requires a consecutive (FP,LR) pair");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
MBB.addLiveIn(Reg1);
@@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOStore, Size, Align));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale],
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
+ MachineMemOperand::MOStore, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+
}
return true;
}
@@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
+ bool NeedsWinCFI = needsWinCFI(MF);
if (MI != MBB.end())
DL = MI->getDebugLoc();
@@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- if (RPI.IsGPR)
- LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
- else
- LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOLoad, Size, Align));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale]
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+ MachineMemOperand::MOLoad, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
-
if (ReverseCSRRestoreSeq)
for (const RegPairInfo &RPI : reverse(RegPairs))
EmitMI(RPI);
@@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
MachineFrameInfo &MFI = MF.getFrameInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;
- unsigned SpillEstimate = SavedRegs.count();
- for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- unsigned PairedReg = CSRegs[i ^ 1];
- if (Reg == BasePointerReg)
- SpillEstimate++;
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
- SpillEstimate++;
- }
- SpillEstimate += 2; // Conservatively include FP+LR in the estimate
- unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
- SavedRegs.set(AArch64::FP);
- SavedRegs.set(AArch64::LR);
- }
-
unsigned ExtraCSSpill = 0;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+ if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ !SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
@@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Calculates the callee saved stack size.
+ unsigned CSStackSize = 0;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned Reg : SavedRegs.set_bits())
+ CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+
+ // Save number of saved regs, so we can easily update CSStackSize later.
+ unsigned NumSavedRegs = SavedRegs.count();
+
+ // The frame record needs to be created by saving the appropriate registers
+ unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+ if (hasFP(MF) ||
+ windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
+ SavedRegs.set(AArch64::FP);
+ SavedRegs.set(AArch64::LR);
+ }
+
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
@@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumRegsSpilled = SavedRegs.count();
- bool CanEliminateFrame = NumRegsSpilled == 0;
+ bool CanEliminateFrame = SavedRegs.count() == 0;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
- LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (CFSize > EstimatedStackSizeLimit);
+ bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPRPaired;
- NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Adding the size of additional 64bit GPR saves.
+ CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+ unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+ LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
+ << EstimatedStackSize + AlignedCSStackSize
+ << " bytes.\n");
+
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
- AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+ AFI->setCalleeSavedStackSize(AlignedCSStackSize);
+ AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}
+
+void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function isn't doing Win64-style C++ EH, we don't need to do
+ // anything.
+ if (!MF.hasEHFunclets())
+ return;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
+
+ MachineBasicBlock &MBB = MF.front();
+ auto MBBI = MBB.begin();
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ if (MBBI->isTerminator())
+ return;
+
+ // Create an UnwindHelp object.
+ int UnwindHelpFI =
+ MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
+ EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
+ // We need to store -2 into the UnwindHelp object at the start of the
+ // function.
+ DebugLoc DL;
+ RS->enterBasicBlock(MBB);
+ unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
+ .addReg(DstReg, getKillRegState(true))
+ .addFrameIndex(UnwindHelpFI)
+ .addImm(0);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
+/// the update. This is easily retrieved as it is exactly the offset that is set
+/// in processFunctionBeforeFrameFinalized.
+int AArch64FrameLowering::getFrameIndexReferencePreferSP(
+ const MachineFunction &MF, int FI, unsigned &FrameReg,
+ bool IgnoreSPUpdates) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
+ << MFI.getObjectOffset(FI) << "\n");
+ FrameReg = AArch64::SP;
+ return MFI.getObjectOffset(FI);
+}
+
+/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
+/// the parent's frame pointer
+unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
+ const MachineFunction &MF) const {
+ return 0;
+}
+
+/// Funclets only need to account for space for the callee saved registers,
+/// as the locals are accounted for in the parent's stack frame.
+unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
+ const MachineFunction &MF) const {
+ // This is the size of the pushed CSRs.
+ unsigned CSSize =
+ MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
+ // This is the amount of stack a funclet needs to allocate.
+ return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
+ getStackAlignment());
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 104e52b5f1f3..0d0385acf46e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -69,6 +69,17 @@ public:
bool enableStackSlotScavenging(const MachineFunction &MF) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
+ unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+ unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
+
+ int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ unsigned &FrameReg,
+ bool IgnoreSPUpdates) const override;
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c1a9ee333b62..fc9855f6a0da 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1208,9 +1208,8 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
CurDAG->RemoveDeadNode(N);
}
@@ -1261,9 +1260,8 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -1441,9 +1439,8 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -1476,9 +1473,8 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
}
@@ -2091,8 +2087,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
(void)BitWidth;
assert(BitWidth == 32 || BitWidth == 64);
- KnownBits Known;
- CurDAG->computeKnownBits(Op, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Op);
// Non-zero in the sense that they're not provably zero, which is the key
// point if we want to use this value
@@ -2171,8 +2166,7 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
// Compute the Known Zero for the AND as this allows us to catch more general
// cases than just looking for AND with imm.
- KnownBits Known;
- CurDAG->computeKnownBits(And, Known);
+ KnownBits Known = CurDAG->computeKnownBits(And);
// Non-zero in the sense that they're not provably zero, which is the key
// point if we want to use this value.
@@ -2313,8 +2307,7 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
// This allows to catch more general case than just looking for
// AND with imm. Indeed, simplify-demanded-bits may have removed
// the AND instruction because it proves it was useless.
- KnownBits Known;
- CurDAG->computeKnownBits(OrOpd1Val, Known);
+ KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
// Check if there is enough room for the second operand to appear
// in the first one
@@ -2690,7 +2683,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
unsigned Reg = PMapper->Encoding;
uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
unsigned State;
- if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+ if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
assert(Immed < 2 && "Bad imm");
State = AArch64::MSRpstateImm1;
} else {
@@ -2751,9 +2744,8 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
Opcode, SDLoc(N),
CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2923,9 +2915,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
MVT::Other, MemAddr, Chain);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp =
+ cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
ReplaceNode(Node, Ld);
return;
}
@@ -2944,9 +2936,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp =
+ cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(Node, St);
return;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cfc7aa96d31f..e01ca14d7f63 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
@@ -333,36 +333,38 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::v4f16, Promote);
- setOperationAction(ISD::FREM, MVT::v8f16, Promote);
+ setOperationAction(ISD::FREM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
@@ -385,8 +387,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
@@ -450,8 +452,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
- setOperationAction(ISD::FMINNAN, Ty, Legal);
- setOperationAction(ISD::FMAXNAN, Ty, Legal);
+ setOperationAction(ISD::FMINIMUM, Ty, Legal);
+ setOperationAction(ISD::FMAXIMUM, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
@@ -463,8 +465,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -714,8 +716,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
+ setOperationAction(ISD::MULHS, VT, Legal);
+ setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -792,9 +794,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
- // CNT supports only B element sizes.
+ // CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
- setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -816,8 +818,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
- for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
- ISD::FMINNUM, ISD::FMAXNUM})
+ for (unsigned Opcode :
+ {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget->isLittleEndian()) {
@@ -993,8 +995,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
- DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
@@ -1086,6 +1088,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FIRST_NUMBER: break;
case AArch64ISD::CALL: return "AArch64ISD::CALL";
case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
+ case AArch64ISD::ADR: return "AArch64ISD::ADR";
case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
@@ -1272,6 +1275,20 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
return EndBB;
}
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+ assert(!isAsynchronousEHPersonality(classifyEHPersonality(
+ BB->getParent()->getFunction().getPersonalityFn())) &&
+ "SEH does not use catchret!");
+ return BB;
+}
+
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -1287,6 +1304,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+
+ case AArch64::CATCHRET:
+ return EmitLoweredCatchRet(MI, BB);
+ case AArch64::CATCHPAD:
+ return EmitLoweredCatchPad(MI, BB);
}
}
@@ -1459,6 +1481,21 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}
+// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
+// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
+// can be set differently by this operation. It comes down to whether
+// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+// everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+//
+// So, finally, the only LLVM-native comparisons that don't mention C and V
+// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+// the absence of information about op2.
+static bool isCMN(SDValue Op, ISD::CondCode CC) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE);
+}
+
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
@@ -1481,20 +1518,15 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
- if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
- // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
- // can be set differently by this operation. It comes down to whether
- // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
- // everything is fine. If not then the optimization is wrong. Thus general
- // comparisons are only valid if op2 != 0.
-
- // So, finally, the only LLVM-native comparisons that don't mention C and V
- // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
- // the absence of information about op2.
+ if (isCMN(RHS, CC)) {
+ // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
+ } else if (isCMN(LHS, CC)) {
+ // As we are looking for EQ/NE compares, the operands can be commuted ; can
+ // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+ Opcode = AArch64ISD::ADDS;
+ LHS = LHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -1796,6 +1828,42 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
/// @}
+/// Returns how profitable it is to fold a comparison's operand's shift and/or
+/// extension operations.
+static unsigned getCmpOperandFoldingProfit(SDValue Op) {
+ auto isSupportedExtend = [&](SDValue V) {
+ if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ return true;
+
+ if (V.getOpcode() == ISD::AND)
+ if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ uint64_t Mask = MaskCst->getZExtValue();
+ return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+ }
+
+ return false;
+ };
+
+ if (!Op.hasOneUse())
+ return 0;
+
+ if (isSupportedExtend(Op))
+ return 1;
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
+ if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ uint64_t Shift = ShiftCst->getZExtValue();
+ if (isSupportedExtend(Op.getOperand(0)))
+ return (Shift <= 4) ? 2 : 1;
+ EVT VT = Op.getValueType();
+ if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
+ return 1;
+ }
+
+ return 0;
+}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
@@ -1853,6 +1921,27 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
+
+ // Comparisons are canonicalized so that the RHS operand is simpler than the
+ // LHS one, the extreme case being when RHS is an immediate. However, AArch64
+ // can fold some shift+extend operations on the RHS operand, so swap the
+ // operands if that can be done.
+ //
+ // For example:
+ // lsl w13, w11, #1
+ // cmp w13, w12
+ // can be turned into:
+ // cmp w12, w11, lsl #1
+ if (!isa<ConstantSDNode>(RHS) ||
+ !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
+ SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
+
+ if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+ }
+ }
+
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
@@ -2619,66 +2708,6 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
-static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
- // Multiplications are only custom-lowered for 128-bit vectors so that
- // {S,U}MULL{2} can be detected. Otherwise v2i64 multiplications are not
- // legal.
- EVT VT = Op.getValueType();
- assert(VT.is128BitVector() && VT.isInteger() &&
- "unexpected type for custom-lowering ISD::MULH{U,S}");
-
- SDValue V0 = Op.getOperand(0);
- SDValue V1 = Op.getOperand(1);
-
- SDLoc DL(Op);
-
- EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
-
- // We turn (V0 mulhs/mulhu V1) to:
- //
- // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
- // (extract_subvector (ExtractVT V128:V1, (i64 0))))),
- // (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
- // (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
- //
- // Where ExtractVT is a subvector with half number of elements, and
- // VMullIdx2 is the index of the middle element (the high part).
- //
- // The vector hight part extract and multiply will be matched against
- // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
- // issue a {s}mull2 instruction.
- //
- // This basically multiply the lower subvector with '{s,u}mull', the high
- // subvector with '{s,u}mull2', and shuffle both results high part in
- // resulting vector.
- unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
- SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
- SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
-
- SDValue VMullV0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
- SDValue VMullV1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
-
- SDValue VMull2V0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
- SDValue VMull2V1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
-
- unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
- : AArch64ISD::UMULL;
-
- EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
- SDValue Mull = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
- SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
-
- Mull = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
- Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
-
- return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
-}
-
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2689,9 +2718,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
- case Intrinsic::aarch64_neon_abs:
- return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
- Op.getOperand(1));
+ case Intrinsic::aarch64_neon_abs: {
+ EVT Ty = Op.getValueType();
+ if (Ty == MVT::i64) {
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
+ Op.getOperand(1));
+ Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
+ } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
+ return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
+ } else {
+ report_fatal_error("Unexpected type for AArch64 NEON intrinic");
+ }
+ }
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
@@ -2794,6 +2833,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
@@ -2830,8 +2871,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
+ case ISD::SPONENTRY:
+ return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
+ case ISD::ADDROFRETURNADDR:
+ return LowerADDROFRETURNADDR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
@@ -2875,9 +2920,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
- case ISD::MULHS:
- case ISD::MULHU:
- return LowerMULH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
@@ -2927,6 +2969,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
+ case CallingConv::AArch64_VectorCall:
+ return CC_AArch64_AAPCS;
}
}
@@ -3113,6 +3157,17 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// We currently pass all varargs at 8-byte alignment.
StackOffset = ((StackOffset + 7) & ~7);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+
+ if (MFI.hasMustTailInVarArgFunc()) {
+ SmallVector<MVT, 2> RegParmTypes;
+ RegParmTypes.push_back(MVT::i64);
+ RegParmTypes.push_back(MVT::f128);
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
+ CC_AArch64_AAPCS);
+ }
}
unsigned StackArgSize = CCInfo.getNextStackOffset();
@@ -3135,6 +3190,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
+ if (Subtarget->hasCustomCallingConv())
+ Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
return Chain;
}
@@ -3365,6 +3423,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (Subtarget->hasCustomCallingConv()) {
+ TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
+ TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
+ }
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
@@ -3566,6 +3628,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
+ const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+ for (const auto &F : Forwards) {
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
+ RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ }
+ }
+
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
++i, ++realArgIdx) {
@@ -3758,6 +3828,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+
+ if (TRI->isAnyArgRegReserved(MF))
+ TRI->emitReservedArgRegCallError(MF);
+
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3944,13 +4020,21 @@ SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
+// (adr sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
+ unsigned Flags) const {
+ LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
+ return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
+}
+
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
- const AArch64II::TOF TargetFlags =
- (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
- : AArch64II::MO_NO_FLAG);
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3958,20 +4042,23 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
- // This also catches the large code model case for Darwin.
+ // This also catches the large code model case for Darwin, and tiny code
+ // model with got relocations.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
- return getGOT(GN, DAG, TargetFlags);
+ return getGOT(GN, DAG, OpFlags);
}
SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- Result = getAddrLarge(GN, DAG, TargetFlags);
+ Result = getAddrLarge(GN, DAG, OpFlags);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ Result = getAddrTiny(GN, DAG, OpFlags);
} else {
- Result = getAddr(GN, DAG, TargetFlags);
+ Result = getAddr(GN, DAG, OpFlags);
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(GN);
- if (GV->hasDLLImportStorageClass())
+ if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
@@ -4036,8 +4123,10 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask = TRI->getTLSCallPreservedMask();
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -4087,13 +4176,15 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(Subtarget->useSmallAddressing() &&
- "ELF TLS only supported in small memory model");
+ if (getTargetMachine().getCodeModel() == CodeModel::Large)
+ report_fatal_error("ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add -mtls-size command line option and make it control the 16MiB
// vs. 4GiB code sequence generation.
+ // FIXME: add tiny codemodel support. We currently generate the same code as
+ // small, which may be larger than needed.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -4264,6 +4355,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
@@ -4306,7 +4404,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if (RHSC && RHSC->getZExtValue() == 0) {
+ if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
@@ -4349,7 +4447,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
- LHS.getOpcode() != ISD::AND) {
+ LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
@@ -4478,18 +4576,42 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
- if (VT == MVT::i32)
- Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
- Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
+ if (VT == MVT::i32 || VT == MVT::i64) {
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
- SDValue UaddLV = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
+ SDValue UaddLV = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
- if (VT == MVT::i64)
- UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
- return UaddLV;
+ if (VT == MVT::i64)
+ UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
+ return UaddLV;
+ }
+
+ assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+ VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
+
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ Val = DAG.getBitcast(VT8Bit, Val);
+ Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
+
+ // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+ unsigned EltSize = 8;
+ unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+ while (EltSize != VT.getScalarSizeInBits()) {
+ EltSize *= 2;
+ NumElts /= 2;
+ MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+ Val = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
+ }
+
+ return Val;
}
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -4811,10 +4933,28 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(JT, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(JT, DAG);
}
return getAddr(JT, DAG);
}
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Jump table entries as PC relative offsets. No additional tweaking
+ // is necessary here. Just get the address of the jump table.
+ SDLoc DL(Op);
+ SDValue JT = Op.getOperand(1);
+ SDValue Entry = Op.getOperand(2);
+ int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+
+ SDNode *Dest =
+ DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
+ Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
+ SDValue(Dest, 0));
+}
+
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -4825,6 +4965,8 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
return getGOT(CP, DAG);
}
return getAddrLarge(CP, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(CP, DAG);
} else {
return getAddr(CP, DAG);
}
@@ -4836,9 +4978,10 @@ SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(BA, DAG);
- } else {
- return getAddr(BA, DAG);
+ } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+ return getAddrTiny(BA, DAG);
}
+ return getAddr(BA, DAG);
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
@@ -5044,21 +5187,59 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ EVT VT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ return DAG.getFrameIndex(FI, VT);
+}
+
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
+ .Case("x1", AArch64::X1)
+ .Case("w1", AArch64::W1)
+ .Case("x2", AArch64::X2)
+ .Case("w2", AArch64::W2)
+ .Case("x3", AArch64::X3)
+ .Case("w3", AArch64::W3)
+ .Case("x4", AArch64::X4)
+ .Case("w4", AArch64::W4)
+ .Case("x5", AArch64::X5)
+ .Case("w5", AArch64::W5)
+ .Case("x6", AArch64::X6)
+ .Case("w6", AArch64::W6)
+ .Case("x7", AArch64::X7)
+ .Case("w7", AArch64::W7)
.Case("x18", AArch64::X18)
.Case("w18", AArch64::W18)
.Case("x20", AArch64::X20)
.Case("w20", AArch64::W20)
.Default(0);
- if (((Reg == AArch64::X18 || Reg == AArch64::W18) &&
- !Subtarget->isX18Reserved()) ||
+ if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
+ !Subtarget->isXRegisterReserved(1)) ||
+ ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
+ !Subtarget->isXRegisterReserved(2)) ||
+ ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
+ !Subtarget->isXRegisterReserved(3)) ||
+ ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
+ !Subtarget->isXRegisterReserved(4)) ||
+ ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
+ !Subtarget->isXRegisterReserved(5)) ||
+ ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
+ !Subtarget->isXRegisterReserved(6)) ||
+ ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
+ !Subtarget->isXRegisterReserved(7)) ||
+ ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+ !Subtarget->isXRegisterReserved(18)) ||
((Reg == AArch64::X20 || Reg == AArch64::W20) &&
- !Subtarget->isX20Reserved()))
+ !Subtarget->isXRegisterReserved(20)))
Reg = 0;
if (Reg)
return Reg;
@@ -5066,6 +5247,20 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ StringRef(RegName) + "\"."));
}
+SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
+
+ return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
+}
+
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -5208,40 +5403,29 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// FIXME: We should be able to handle f128 as well with a clever lowering.
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
- LLVM_DEBUG(
- dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
+ LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
return true;
}
- StringRef FPType;
bool IsLegal = false;
SmallString<128> ImmStrVal;
Imm.toString(ImmStrVal);
- if (VT == MVT::f64) {
- FPType = "f64";
+ if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
- } else if (VT == MVT::f32) {
- FPType = "f32";
+ else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
- } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
- FPType = "f16";
+ else if (VT == MVT::f16 && Subtarget->hasFullFP16())
IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
- }
if (IsLegal) {
- LLVM_DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal
- << "\n");
+ LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
+ << " imm value: " << ImmStrVal << "\n");
return true;
}
- if (!FPType.empty())
- LLVM_DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal
- << "\n");
- else
- LLVM_DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal
- << ": unsupported fp type\n");
-
+ LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
+ << " imm value: " << ImmStrVal << "\n");
return false;
}
@@ -5453,6 +5637,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
case 'w':
+ if (!Subtarget->hasFPARMv8())
+ break;
if (VT.getSizeInBits() == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VT.getSizeInBits() == 32)
@@ -5465,6 +5651,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
+ if (!Subtarget->hasFPARMv8())
+ break;
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
@@ -5500,6 +5688,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
}
}
+ if (Res.second && !Subtarget->hasFPARMv8() &&
+ !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
+ !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
+ return std::make_pair(0U, nullptr);
+
return Res;
}
@@ -6921,10 +7114,19 @@ static SDValue NormalizeBuildVector(SDValue Op,
SmallVector<SDValue, 16> Ops;
for (SDValue Lane : Op->ops()) {
+ // For integer vectors, type legalization would have promoted the
+ // operands already. Otherwise, if Op is a floating-point splat
+ // (with operands cast to integers), then the only possibilities
+ // are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
+ } else if (Lane.getNode()->isUndef()) {
+ Lane = DAG.getUNDEF(MVT::i32);
+ } else {
+ assert(Lane.getValueType() == MVT::i32 &&
+ "Unexpected BUILD_VECTOR operand type");
}
Ops.push_back(Lane);
}
@@ -7050,7 +7252,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getUNDEF(VT);
}
- if (isOnlyLowElement) {
+ // Convert BUILD_VECTOR where all elements but the lowest are undef into
+ // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
+ // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
+ if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
@@ -7632,7 +7837,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
- return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
+ Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return Cmp;
}
@@ -7718,8 +7923,10 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask();
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
+ if (Subtarget->hasCustomCallingConv())
+ TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
@@ -7827,7 +8034,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -7891,6 +8098,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+ return false;
+
// If we're reducing the load width in order to avoid having to use an extra
// instruction to do extension then it's probably a good idea.
if (ExtTy != ISD::NON_EXTLOAD)
@@ -8348,27 +8559,30 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
- // instruction to materialize the v2i64 zero and one store (with restrictive
- // addressing mode). Just do two i64 store of zero-registers.
- bool Fast;
const Function &F = MF.getFunction();
- if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F.hasFnAttribute(Attribute::NoImplicitFloat) &&
- (memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
- return MVT::f128;
+ bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
+ bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
+ // Only use AdvSIMD to implement memset of 32-byte and above. It would have
+ // taken one instruction to materialize the v2i64 zero and one store (with
+ // restrictive addressing mode). Just do i64 stores.
+ bool IsSmallMemset = IsMemset && Size < 32;
+ auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
+ if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+ return true;
+ bool Fast;
+ return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+ };
- if (Size >= 8 &&
- (memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+ if (CanUseNEON && IsMemset && !IsSmallMemset &&
+ AlignmentIsAcceptable(MVT::v2i64, 16))
+ return MVT::v2i64;
+ if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+ return MVT::f128;
+ if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
return MVT::i64;
-
- if (Size >= 4 &&
- (memOpAlign(SrcAlign, DstAlign, 4) ||
- (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+ if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
return MVT::i32;
-
return MVT::Other;
}
@@ -8496,7 +8710,9 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
}
bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const {
+ N = N->getOperand(0).getNode();
EVT VT = N->getValueType(0);
// If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
// it with shift to let it be lowered to UBFX.
@@ -9717,10 +9933,10 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
- return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
- return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
@@ -9849,6 +10065,7 @@ static SDValue performExtendCombine(SDNode *N,
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
+ assert(!St.isTruncatingStore() && "cannot split truncating vector store");
unsigned OrigAlignment = St.getAlignment();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
@@ -9923,6 +10140,11 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
if (!StVal.hasOneUse())
return SDValue();
+ // If the store is truncating then it's going down to i16 or smaller, which
+ // means it can be implemented in a single store anyway.
+ if (St.isTruncatingStore())
+ return SDValue();
+
// If the immediate offset of the address operand is too large for the stp
// instruction, then bail out.
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
@@ -9973,6 +10195,11 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
+ // If the store is truncating then it's going down to i16 or smaller, which
+ // means it can be implemented in a single store anyway.
+ if (St.isTruncatingStore())
+ return SDValue();
+
// Check that this is a splat.
// Make sure that each of the relevant vector element locations are inserted
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
@@ -10129,15 +10356,6 @@ static SDValue performPostLD1Combine(SDNode *N,
|| UI.getUse().getResNo() != Addr.getResNo())
continue;
- // Check that the add is independent of the load. Otherwise, folding it
- // would create a cycle.
- if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
- continue;
- // Also check that add is not used in the vector operand. This would also
- // create a cycle.
- if (User->isPredecessorOf(Vector.getNode()))
- continue;
-
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
@@ -10148,11 +10366,16 @@ static SDValue performPostLD1Combine(SDNode *N,
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
- // Finally, check that the vector doesn't depend on the load.
- // Again, this would create a cycle.
- // The load depending on the vector is fine, as that's the case for the
- // LD1*post we'll eventually generate anyway.
- if (LoadSDN->isPredecessorOf(Vector.getNode()))
+ // To avoid cycle construction make sure that neither the load nor the add
+ // are predecessors to each other or the Vector.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(N);
+ Worklist.push_back(User);
+ Worklist.push_back(LD);
+ Worklist.push_back(Vector.getNode());
+ if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
SmallVector<SDValue, 8> Ops;
@@ -10238,7 +10461,13 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
- if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Addr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
@@ -10608,6 +10837,13 @@ SDValue performCONDCombine(SDNode *N,
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+ // will not be produced, as they are conditional branch instructions that do
+ // not set flags.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return SDValue();
+
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
@@ -10865,9 +11101,9 @@ static SDValue performNVCASTCombine(SDNode *N) {
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
const TargetMachine &TM) {
- auto *GN = dyn_cast<GlobalAddressSDNode>(N);
- if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
- AArch64II::MO_NO_FLAG)
+ auto *GN = cast<GlobalAddressSDNode>(N);
+ if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+ AArch64II::MO_NO_FLAG)
return SDValue();
uint64_t MinOffset = -1ull;
@@ -10999,6 +11235,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
break;
}
+ break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
@@ -11196,12 +11433,10 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
N->getOperand(0), // Chain in
};
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
unsigned Opcode;
- switch (MemOp[0]->getOrdering()) {
+ switch (MemOp->getOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
@@ -11221,7 +11456,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
- CmpSwap->setMemRefs(MemOp, MemOp + 1);
+ DAG.setNodeMemRefs(CmpSwap, {MemOp});
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
@@ -11242,10 +11477,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
AArch64::CMP_SWAP_128, SDLoc(N),
DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops);
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Results.push_back(SDValue(CmpSwap, 0));
Results.push_back(SDValue(CmpSwap, 1));
@@ -11310,12 +11543,11 @@ unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
}
TargetLoweringBase::LegalizeTypeAction
-AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
- MVT SVT = VT.getSimpleVT();
+AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
- if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
- || SVT == MVT::v1f32)
+ if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
+ VT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -11349,16 +11581,20 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
-bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
- if (Subtarget->hasLSE()) return false;
+ if (Subtarget->hasLSE())
+ return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- return getTargetMachine().getOptLevel() != 0;
+ if (getTargetMachine().getOptLevel() == 0)
+ return AtomicExpansionKind::None;
+ return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
@@ -11468,6 +11704,39 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
return TargetLowering::getIRStackGuard(IRB);
}
+void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
+ // MSVC CRT provides functionalities for stack protection.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal("__security_cookie",
+ Type::getInt8PtrTy(M.getContext()));
+
+ // MSVC CRT has a function to validate security cookie.
+ auto *SecurityCheckCookie = cast<Function>(
+ M.getOrInsertFunction("__security_check_cookie",
+ Type::getVoidTy(M.getContext()),
+ Type::getInt8PtrTy(M.getContext())));
+ SecurityCheckCookie->setCallingConv(CallingConv::Win64);
+ SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+ return;
+ }
+ TargetLowering::insertSSPDeclarations(M);
+}
+
+Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
+ // MSVC CRT has a global variable holding security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getGlobalVariable("__security_cookie");
+ return TargetLowering::getSDagStackGuard(M);
+}
+
+Value *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+ // MSVC CRT has a function to validate security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getFunction("__security_check_cookie");
+ return TargetLowering::getSSPStackGuardCheck(M);
+}
+
Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
@@ -11572,3 +11841,8 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
MF.getFrameInfo().computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
+
+// Unlike X86, we let frame lowering assign offsets to all catch objects.
+bool AArch64TargetLowering::needsFixedCatchObjects() const {
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d783c8a6048c..ffc4cc3ef534 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -35,6 +35,7 @@ enum NodeType : unsigned {
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
+ ADR, // ADR
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
// Offset Table, TLS record).
@@ -301,6 +302,12 @@ public:
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -363,7 +370,8 @@ public:
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
- bool isDesirableToCommuteWithShift(const SDNode *N) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
@@ -388,16 +396,21 @@ public:
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
/// If the target has a standard location for the stack protector cookie,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getIRStackGuard(IRBuilder<> &IRB) const override;
+ void insertSSPDeclarations(Module &M) const override;
+ Value *getSDagStackGuard(const Module &M) const override;
+ Value *getSSPStackGuardCheck(const Module &M) const override;
+
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
@@ -514,6 +527,8 @@ public:
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
+ /// Used for exception handling on Win64.
+ bool needsFixedCatchObjects() const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -586,6 +601,8 @@ private:
SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
+ template <class NodeTy>
+ SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -602,6 +619,7 @@ private:
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -611,6 +629,7 @@ private:
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7caf32dbde2a..9061ed4f9f54 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -263,6 +263,14 @@ class SImmOperand<int width> : AsmOperandClass {
let PredicateMethod = "isSImm<" # width # ">";
}
+
+class AsmImmRange<int Low, int High> : AsmOperandClass {
+ let Name = "Imm" # Low # "_" # High;
+ let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
+}
+
// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets.
def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>;
def simm10Scaled : Operand<i64> {
@@ -271,6 +279,12 @@ def simm10Scaled : Operand<i64> {
let PrintMethod = "printImmScale<8>";
}
+def simm9s16 : Operand<i64> {
+ let ParserMatchClass = SImmScaledMemoryIndexed<9, 16>;
+ let DecoderMethod = "DecodeSImm<9>";
+ let PrintMethod = "printImmScale<16>";
+}
+
// uimm6 predicate - True if the immediate is in the range [0, 63].
def UImm6Operand : AsmOperandClass {
let Name = "UImm6";
@@ -281,6 +295,10 @@ def uimm6 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
let ParserMatchClass = UImm6Operand;
}
+def uimm16 : Operand<i16>, ImmLeaf<i16, [{return Imm >= 0 && Imm < 65536;}]>{
+ let ParserMatchClass = AsmImmRange<0, 65535>;
+}
+
def SImm9Operand : SImmOperand<9>;
def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
let ParserMatchClass = SImm9Operand;
@@ -366,6 +384,7 @@ def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;
def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;
+def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>;
def uimm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
let ParserMatchClass = UImm6s1Operand;
@@ -385,6 +404,11 @@ def uimm6s8 : Operand<i64>, ImmLeaf<i64,
let PrintMethod = "printImmScale<8>";
let ParserMatchClass = UImm6s8Operand;
}
+def uimm6s16 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*16) && ((Imm % 16) == 0); }]> {
+ let PrintMethod = "printImmScale<16>";
+ let ParserMatchClass = UImm6s16Operand;
+}
// simm6sN predicate - True if the immediate is a multiple of N in the range
// [-32 * N, 31 * N].
@@ -435,13 +459,6 @@ def simm4s16 : Operand<i64>, ImmLeaf<i64,
let DecoderMethod = "DecodeSImm<4>";
}
-class AsmImmRange<int Low, int High> : AsmOperandClass {
- let Name = "Imm" # Low # "_" # High;
- let DiagnosticType = "InvalidImm" # Low # "_" # High;
- let RenderMethod = "addImmOperands";
- let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
-}
-
def Imm1_8Operand : AsmImmRange<1, 8>;
def Imm1_16Operand : AsmImmRange<1, 16>;
def Imm1_32Operand : AsmImmRange<1, 32>;
@@ -696,11 +713,10 @@ def logical_imm64_not : Operand<i64> {
}
// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def Imm0_65535Operand : AsmImmRange<0, 65535>;
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
}]> {
- let ParserMatchClass = Imm0_65535Operand;
+ let ParserMatchClass = AsmImmRange<0, 65535>;
let PrintMethod = "printImmHex";
}
@@ -1149,6 +1165,21 @@ def psbhint_op : Operand<i32> {
}];
}
+def BTIHintOperand : AsmOperandClass {
+ let Name = "BTIHint";
+ let ParserMethod = "tryParseBTIHint";
+}
+def btihint_op : Operand<i32> {
+ let ParserMatchClass = BTIHintOperand;
+ let PrintMethod = "printBTIHintOp";
+ let MCOperandPredicate = [{
+ // "bti" is an alias to "hint" only for certain values of CRm:Op2 fields.
+ if (!MCOp.isImm())
+ return false;
+ return AArch64BTIHint::lookupBTIByEncoding((MCOp.getImm() ^ 32) >> 1) != nullptr;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -1173,16 +1204,23 @@ def pstatefield4_op : Operand<i32> {
let PrintMethod = "printSystemPStateField";
}
+// Instructions to modify PSTATE, no input reg
let Defs = [NZCV] in
+class PstateWriteSimple<dag iops, string asm, string operands>
+ : SimpleSystemI<0, iops, asm, operands> {
+
+ let Inst{20-19} = 0b00;
+ let Inst{15-12} = 0b0100;
+}
+
class MSRpstateImm0_15
- : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
- "msr", "\t$pstatefield, $imm">,
+ : PstateWriteSimple<(ins pstatefield4_op:$pstatefield, imm0_15:$imm), "msr",
+ "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
+
bits<6> pstatefield;
bits<4> imm;
- let Inst{20-19} = 0b00;
let Inst{18-16} = pstatefield{5-3};
- let Inst{15-12} = 0b0100;
let Inst{11-8} = imm;
let Inst{7-5} = pstatefield{2-0};
@@ -1201,16 +1239,15 @@ def pstatefield1_op : Operand<i32> {
let PrintMethod = "printSystemPStateField";
}
-let Defs = [NZCV] in
class MSRpstateImm0_1
- : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
- "msr", "\t$pstatefield, $imm">,
+ : PstateWriteSimple<(ins pstatefield1_op:$pstatefield, imm0_1:$imm), "msr",
+ "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
+
bits<6> pstatefield;
bit imm;
- let Inst{20-19} = 0b00;
let Inst{18-16} = pstatefield{5-3};
- let Inst{15-9} = 0b0100000;
+ let Inst{11-9} = 0b000;
let Inst{8} = imm;
let Inst{7-5} = pstatefield{2-0};
@@ -1719,10 +1756,12 @@ multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
}
class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
- SDPatternOperator OpNode>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ SDPatternOperator OpNode,
+ RegisterClass in1regtype = regtype,
+ RegisterClass in2regtype = regtype>
+ : I<(outs regtype:$Rd), (ins in1regtype:$Rn, in2regtype:$Rm),
asm, "\t$Rd, $Rn, $Rm", "",
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
+ [(set regtype:$Rd, (OpNode in1regtype:$Rn, in2regtype:$Rm))]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -1902,7 +1941,7 @@ class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
//---
def movimm32_imm : Operand<i32> {
- let ParserMatchClass = Imm0_65535Operand;
+ let ParserMatchClass = AsmImmRange<0, 65535>;
let EncoderMethod = "getMoveWideImmOpValue";
let PrintMethod = "printImm";
}
@@ -1977,23 +2016,29 @@ multiclass InsertImmediate<bits<2> opc, string asm> {
//---
class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
- RegisterClass srcRegtype, addsub_shifted_imm immtype,
- string asm, SDPatternOperator OpNode>
- : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
- asm, "\t$Rd, $Rn, $imm", "",
- [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
- Sched<[WriteI, ReadI]> {
+ string asm_inst, string asm_ops,
+ dag inputs, dag pattern>
+ : I<(outs dstRegtype:$Rd), inputs, asm_inst, asm_ops, "", [pattern]>,
+ Sched<[WriteI, ReadI]> {
bits<5> Rd;
bits<5> Rn;
- bits<14> imm;
let Inst{30} = isSub;
let Inst{29} = setFlags;
let Inst{28-24} = 0b10001;
- let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
- let Inst{21-10} = imm{11-0};
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
- let DecoderMethod = "DecodeBaseAddSubImm";
+}
+
+class AddSubImmShift<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass srcRegtype, addsub_shifted_imm immtype,
+ string asm_inst, SDPatternOperator OpNode>
+ : BaseAddSubImm<isSub, setFlags, dstRegtype, asm_inst, "\t$Rd, $Rn, $imm",
+ (ins srcRegtype:$Rn, immtype:$imm),
+ (set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))> {
+ bits<14> imm;
+ let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
+ let Inst{21-10} = imm{11-0};
+ let DecoderMethod = "DecodeAddSubImmShift";
}
class BaseAddSubRegPseudo<RegisterClass regtype,
@@ -2097,12 +2142,12 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
// We used to match the register variant before the immediate when the
// register argument could be implicitly zero-extended.
let AddedComplexity = 6 in
- def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
+ def Wri : AddSubImmShift<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
let AddedComplexity = 6 in
- def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
+ def Xri : AddSubImmShift<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
}
@@ -2173,11 +2218,11 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
string alias, string cmpAlias> {
let isCompare = 1, Defs = [NZCV] in {
// Add/Subtract immediate
- def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
+ def Wri : AddSubImmShift<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
- def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
+ def Xri : AddSubImmShift<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
}
@@ -2271,6 +2316,27 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
GPR64, GPR64sponly, GPR64, 24>; // UXTX #0
}
+class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>
+ : BaseAddSubImm<
+ isSub, 0, GPR64sp, asm_inst, "\t$Rd, $Rn, $imm6, $imm4",
+ (ins GPR64sp:$Rn, uimm6s16:$imm6, imm0_15:$imm4),
+ (set GPR64sp:$Rd, (OpNode GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4))> {
+ bits<6> imm6;
+ bits<4> imm4;
+ let Inst{31} = 1;
+ let Inst{23-22} = 0b10;
+ let Inst{21-16} = imm6;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = imm4;
+ let Unpredictable{15-14} = 0b11;
+}
+
+class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode>
+ : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> {
+ let Inst{31} = 1;
+ let Inst{29} = setsFlags;
+}
+
//---
// Extract
//---
@@ -2853,10 +2919,10 @@ def am_ldrlit : Operand<iPTR> {
let OperandType = "OPERAND_PCREL";
}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm>
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0, AddedComplexity = 20 in
+class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm, list<dag> pat>
: I<(outs regtype:$Rt), (ins am_ldrlit:$label),
- asm, "\t$Rt, $label", "", []>,
+ asm, "\t$Rt, $label", "", pat>,
Sched<[WriteLD]> {
bits<5> Rt;
bits<19> label;
@@ -3932,6 +3998,78 @@ class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
let Constraints = "@earlyclobber $Ws";
}
+// Armv8.5-A Memory Tagging Extension
+class BaseMemTag<bits<2> opc1, bits<2> opc2, string asm_insn,
+ string asm_opnds, string cstr, dag oops, dag iops>
+ : I<oops, iops, asm_insn, asm_opnds, cstr, []>,
+ Sched<[]> {
+ bits<5> Rn;
+
+ let Inst{31-24} = 0b11011001;
+ let Inst{23-22} = opc1;
+ let Inst{21} = 1;
+ // Inst{20-12} defined by subclass
+ let Inst{11-10} = opc2;
+ let Inst{9-5} = Rn;
+ // Inst{4-0} defined by subclass
+}
+
+class MemTagVector<bit Load, string asm_insn, string asm_opnds,
+ dag oops, dag iops>
+ : BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds,
+ "$Rn = $wback,@earlyclobber $wback", oops, iops> {
+ bits<5> Rt;
+
+ let Inst{20-12} = 0b000000000;
+ let Inst{4-0} = Rt;
+
+ let mayLoad = Load;
+}
+
+class MemTagLoad<string asm_insn, string asm_opnds>
+ : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "", (outs GPR64:$Rt),
+ (ins GPR64sp:$Rn, simm9s16:$offset)> {
+ bits<5> Rt;
+ bits<9> offset;
+
+ let Inst{20-12} = offset;
+ let Inst{4-0} = Rt;
+
+ let mayLoad = 1;
+}
+
+class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
+ string asm_opnds, string cstr, dag oops, dag iops>
+ : BaseMemTag<opc1, opc2, asm_insn, asm_opnds, cstr, oops, iops> {
+ bits<5> Rt;
+ bits<9> offset;
+
+ let Inst{20-12} = offset;
+ let Inst{4-0} = 0b11111;
+ let Unpredictable{4-0} = 0b11111;
+
+ let mayStore = 1;
+}
+
+multiclass MemTagStore<bits<2> opc1, string insn> {
+ def Offset :
+ BaseMemTagStore<opc1, 0b10, insn, "\t[$Rn, $offset]", "",
+ (outs), (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ def PreIndex :
+ BaseMemTagStore<opc1, 0b11, insn, "\t[$Rn, $offset]!",
+ "$Rn = $wback,@earlyclobber $wback",
+ (outs GPR64sp:$wback),
+ (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ def PostIndex :
+ BaseMemTagStore<opc1, 0b01, insn, "\t[$Rn], $offset",
+ "$Rn = $wback,@earlyclobber $wback",
+ (outs GPR64sp:$wback),
+ (ins GPR64sp:$Rn, simm9s16:$offset)>;
+
+ def : InstAlias<insn # "\t[$Rn]",
+ (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rn, 0)>;
+}
+
//---
// Exception generation
//---
@@ -3948,6 +4086,19 @@ class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
let Inst{1-0} = ll;
}
+//---
+// UDF : Permanently UNDEFINED instructions. Format: Opc = 0x0000, 16 bit imm.
+//--
+let hasSideEffects = 1, isTrap = 1, mayLoad = 0, mayStore = 0 in {
+class UDFType<bits<16> opc, string asm>
+ : I<(outs), (ins uimm16:$imm),
+ asm, "\t$imm", "", []>,
+ Sched<[]> {
+ bits<16> imm;
+ let Inst{31-16} = opc;
+ let Inst{15-0} = imm;
+}
+}
let Predicates = [HasFPARMv8] in {
//---
@@ -4395,7 +4546,7 @@ multiclass FPConversion<string asm> {
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
+class BaseSingleOperandFPData<bits<6> opcode, RegisterClass regtype,
ValueType vt, string asm, SDPatternOperator node>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
[(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
@@ -4403,8 +4554,8 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
bits<5> Rd;
bits<5> Rn;
let Inst{31-24} = 0b00011110;
- let Inst{21-19} = 0b100;
- let Inst{18-15} = opcode;
+ let Inst{21} = 0b1;
+ let Inst{20-15} = opcode;
let Inst{14-10} = 0b10000;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
@@ -4412,20 +4563,37 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass SingleOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
- def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+
+ def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> {
let Inst{23-22} = 0b11; // 16-bit size flag
let Predicates = [HasFullFP16];
}
- def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ def Sr : BaseSingleOperandFPData<{0b00,opcode}, FPR32, f32, asm, node> {
let Inst{23-22} = 0b00; // 32-bit size flag
}
- def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> {
let Inst{23-22} = 0b01; // 64-bit size flag
}
}
+multiclass SingleOperandFPNo16<bits<6> opcode, string asm,
+ SDPatternOperator node = null_frag>{
+
+ def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ let Inst{23-22} = 0b00; // 32-bit registers
+ }
+
+ def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ let Inst{23-22} = 0b01; // 64-bit registers
+ }
+}
+
+// FRInt[32|64][Z|N] instructions
+multiclass FRIntNNT<bits<2> opcode, string asm, SDPatternOperator node = null_frag> :
+ SingleOperandFPNo16<{0b0100,opcode}, asm, node>;
+
//---
// Two operand floating point data processing
//---
@@ -4790,25 +4958,6 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
-class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
- string kind2, RegisterOperand RegType,
- ValueType AccumType, ValueType InputType,
- SDPatternOperator OpNode> :
- BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
- [(set (AccumType RegType:$dst),
- (OpNode (AccumType RegType:$Rd),
- (InputType RegType:$Rn),
- (InputType RegType:$Rm)))]> {
- let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
-}
-
-multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
- v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
- v4i32, v16i8, OpNode>;
-}
-
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -5049,6 +5198,51 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
V128:$LHS, V128:$MHS, V128:$RHS)>;
}
+// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract
+// bytes from S-sized elements.
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
+ [(set (AccumType RegType:$dst),
+ (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
+ v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
+ v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions
+// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from
+// 8H to 4S, when Q=1).
+class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDThreeSameVectorTied<Q, U, size, 0b11101, RegType, asm, kind1,
+ [(set (AccumType RegType:$dst),
+ (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+ let Inst{13} = b13;
+}
+
+multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
+ SDPatternOperator OpNode> {
+ def v4f16 : BaseSIMDThreeSameVectorFML<0, U, b13, size, asm, ".2s", ".2h", V64,
+ v2f32, v4f16, OpNode>;
+ def v8f16 : BaseSIMDThreeSameVectorFML<1, U, b13, size, asm, ".4s", ".4h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
@@ -5293,7 +5487,7 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
[(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
}
-// Supports only S and D element sizes, uses high bit of the size field
+// Supports H, S and D element sizes, uses high bit of the size field
// as an extra opcode bit.
multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -5316,6 +5510,25 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
+// Supports only S and D element sizes
+multiclass SIMDTwoVectorSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+
+ def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+multiclass FRIntNNTVector<bit U, bit op, string asm,
+ SDPatternOperator OpNode = null_frag> :
+ SIMDTwoVectorSD<U, {0b1111,op}, asm, OpNode>;
+
// Supports only S element size.
multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -7236,7 +7449,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
-// ARMv8.2 Index Dot product instructions
+// ARMv8.2-A Dot Product Instructions (Indexed)
class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
string lhs_kind, string rhs_kind,
RegisterOperand RegType,
@@ -7257,10 +7470,38 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64,
- v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128,
- v4i32, v16i8, OpNode>;
+ def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b",
+ V64, v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b",
+ V128, v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
+class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
+ string dst_kind, string lhs_kind,
+ string rhs_kind, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, RegType, RegType, V128,
+ VectorIndexH, asm, "", dst_kind, lhs_kind, rhs_kind,
+ [(set (AccumType RegType:$dst),
+ (AccumType (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType (AArch64duplane16 (v8f16 V128:$Rm),
+ VectorIndexH:$idx)))))]> {
+ // idx = H:L:M
+ bits<3> idx;
+ let Inst{11} = idx{2}; // H
+ let Inst{21} = idx{1}; // L
+ let Inst{20} = idx{0}; // M
+}
+
+multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h",
+ V64, v2f32, v4f16, OpNode>;
+ def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h",
+ V128, v4f32, v8f16, OpNode>;
}
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
@@ -9748,9 +9989,10 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
let Inst{4-0} = Rd;
}
+//8.3 CompNum - Floating-point complex number support
multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
string asm, SDPatternOperator OpNode>{
- let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9766,7 +10008,7 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
(rottype i32:$rot)))]>;
}
- let Predicates = [HasV8_3a, HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9822,7 +10064,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
Operand rottype, string asm,
SDPatternOperator OpNode> {
- let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
rottype, asm, ".4h",
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9838,7 +10080,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
(rottype i32:$rot)))]>;
}
- let Predicates = [HasV8_3a, HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
rottype, asm, ".2s",
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9904,7 +10146,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
// classes.
multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
string asm, SDPatternOperator OpNode> {
- let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
".4h", ".h", []> {
@@ -9920,9 +10162,9 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
let Inst{11} = idx{1};
let Inst{21} = idx{0};
}
- } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+ } // Predicates = HasComplxNum, HasNEON, HasFullFP16]
- let Predicates = [HasV8_3a,HasNEON] in {
+ let Predicates = [HasComplxNum, HasNEON] in {
def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
".4s", ".4s", ".s", []> {
@@ -9930,7 +10172,7 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
let Inst{11} = idx{0};
let Inst{21} = 0;
}
- } // Predicates = [HasV8_3a,HasNEON]
+ } // Predicates = [HasComplxNum, HasNEON]
}
//----------------------------------------------------------------------------
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 032d53d19620..ada067888572 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -66,7 +66,8 @@ static cl::opt<unsigned>
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
- : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
+ AArch64::CATCHRET),
RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
@@ -108,6 +109,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
break;
+ case AArch64::JumpTableDest32:
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ NumBytes = 12;
+ break;
+ case AArch64::SPACE:
+ NumBytes = MI.getOperand(1).getImm();
+ break;
}
return NumBytes;
@@ -675,14 +684,36 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI.isAsCheapAsAMove();
+ const unsigned Opcode = MI.getOpcode();
+
+ // Firstly, check cases gated by features.
+
+ if (Subtarget.hasZeroCycleZeroingFP()) {
+ if (Opcode == AArch64::FMOVH0 ||
+ Opcode == AArch64::FMOVS0 ||
+ Opcode == AArch64::FMOVD0)
+ return true;
+ }
+
+ if (Subtarget.hasZeroCycleZeroingGP()) {
+ if (Opcode == TargetOpcode::COPY &&
+ (MI.getOperand(1).getReg() == AArch64::WZR ||
+ MI.getOperand(1).getReg() == AArch64::XZR))
+ return true;
+ }
+
+ // Secondly, check cases specific to sub-targets.
+
if (Subtarget.hasExynosCheapAsMoveHandling()) {
- if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
+ if (isExynosCheapAsMove(MI))
return true;
- else
- return MI.isAsCheapAsAMove();
+
+ return MI.isAsCheapAsAMove();
}
- switch (MI.getOpcode()) {
+ // Finally, check generic cases.
+
+ switch (Opcode) {
default:
return false;
@@ -723,217 +754,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
return canBeExpandedToORR(MI, 32);
case AArch64::MOVi64imm:
return canBeExpandedToORR(MI, 64);
-
- // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
- // feature.
- case AArch64::FMOVH0:
- case AArch64::FMOVS0:
- case AArch64::FMOVD0:
- return Subtarget.hasZeroCycleZeroing();
- case TargetOpcode::COPY:
- return (Subtarget.hasZeroCycleZeroing() &&
- (MI.getOperand(1).getReg() == AArch64::WZR ||
- MI.getOperand(1).getReg() == AArch64::XZR));
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
-bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
- unsigned Reg, Imm, Shift;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // MOV Rd, SP
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
-
- // Literal
- case AArch64::ADR:
- case AArch64::ADRP:
- return true;
-
- // MOVI Vd, #0
- case AArch64::MOVID:
- case AArch64::MOVIv8b_ns:
- case AArch64::MOVIv2d_ns:
- case AArch64::MOVIv16b_ns:
- Imm = MI.getOperand(1).getImm();
- return (Imm == 0);
-
- // MOVI Vd, #0
- case AArch64::MOVIv2i32:
- case AArch64::MOVIv4i16:
- case AArch64::MOVIv4i32:
- case AArch64::MOVIv8i16:
- Imm = MI.getOperand(1).getImm();
- Shift = MI.getOperand(2).getImm();
- return (Imm == 0 && Shift == 0);
-
- // MOV Rd, Imm
- case AArch64::MOVNWi:
- case AArch64::MOVNXi:
-
- // MOV Rd, Imm
- case AArch64::MOVZWi:
- case AArch64::MOVZXi:
- return true;
-
- // MOV Rd, Imm
- case AArch64::ORRWri:
- case AArch64::ORRXri:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(2).getImm();
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
-
- // MOV Rd, Rm
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- if (!MI.getOperand(1).isReg())
- return false;
-
- Reg = MI.getOperand(1).getReg();
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
- }
-}
-
-bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
- unsigned Imm, Shift;
- AArch64_AM::ShiftExtendType Ext;
-
- switch (MI.getOpcode()) {
- default:
- return false;
-
- // WriteI
- case AArch64::ADDSWri:
- case AArch64::ADDSXri:
- case AArch64::ADDWri:
- case AArch64::ADDXri:
- case AArch64::SUBSWri:
- case AArch64::SUBSXri:
- case AArch64::SUBWri:
- case AArch64::SUBXri:
- return true;
-
- // WriteISReg
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getShiftValue(Imm);
- Ext = AArch64_AM::getShiftType(Imm);
- return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
-
- // WriteIEReg
- case AArch64::ADDSWrx:
- case AArch64::ADDSXrx:
- case AArch64::ADDSXrx64:
- case AArch64::ADDWrx:
- case AArch64::ADDXrx:
- case AArch64::ADDXrx64:
- case AArch64::SUBSWrx:
- case AArch64::SUBSXrx:
- case AArch64::SUBSXrx64:
- case AArch64::SUBWrx:
- case AArch64::SUBXrx:
- case AArch64::SUBXrx64:
- Imm = MI.getOperand(3).getImm();
- Shift = AArch64_AM::getArithShiftValue(Imm);
- Ext = AArch64_AM::getArithExtendType(Imm);
- return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
-
- case AArch64::PRFMroW:
- case AArch64::PRFMroX:
-
- // WriteLDIdx
- case AArch64::LDRBBroW:
- case AArch64::LDRBBroX:
- case AArch64::LDRHHroW:
- case AArch64::LDRHHroX:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroW:
- case AArch64::LDRSWroX:
- case AArch64::LDRWroW:
- case AArch64::LDRWroX:
- case AArch64::LDRXroW:
- case AArch64::LDRXroX:
-
- case AArch64::LDRBroW:
- case AArch64::LDRBroX:
- case AArch64::LDRDroW:
- case AArch64::LDRDroX:
- case AArch64::LDRHroW:
- case AArch64::LDRHroX:
- case AArch64::LDRSroW:
- case AArch64::LDRSroX:
-
- // WriteSTIdx
- case AArch64::STRBBroW:
- case AArch64::STRBBroX:
- case AArch64::STRHHroW:
- case AArch64::STRHHroX:
- case AArch64::STRWroW:
- case AArch64::STRWroX:
- case AArch64::STRXroW:
- case AArch64::STRXroX:
-
- case AArch64::STRBroW:
- case AArch64::STRBroX:
- case AArch64::STRDroW:
- case AArch64::STRDroX:
- case AArch64::STRHroW:
- case AArch64::STRHroX:
- case AArch64::STRSroW:
- case AArch64::STRSroX:
- Imm = MI.getOperand(3).getImm();
- Ext = AArch64_AM::getMemExtendType(Imm);
- return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
- }
-}
-
-bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
+bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
@@ -1055,6 +881,32 @@ bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
}
}
+bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::SEH_StackAlloc:
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveFPLR_X:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveReg_X:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveRegP_X:
+ case AArch64::SEH_SaveFReg:
+ case AArch64::SEH_SaveFReg_X:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFRegP_X:
+ case AArch64::SEH_SetFP:
+ case AArch64::SEH_AddFP:
+ case AArch64::SEH_Nop:
+ case AArch64::SEH_PrologEnd:
+ case AArch64::SEH_EpilogStart:
+ case AArch64::SEH_EpilogEnd:
+ return true;
+ }
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -1078,7 +930,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned BaseRegA = 0, BaseRegB = 0;
+ MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
@@ -1089,14 +941,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
- // Retrieve the base register, offset from the base register and width. Width
+ // Retrieve the base, offset from the base and width. Width
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
- // base registers are identical, and the offset of a lower memory access +
+ // base are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
- if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
- getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
- if (BaseRegA == BaseRegB) {
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -1107,6 +959,26 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
return false;
}
+bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+ return true;
+ switch (MI.getOpcode()) {
+ case AArch64::HINT:
+ // CSDB hints are scheduling barriers.
+ if (MI.getOperand(0).getImm() == 0x14)
+ return true;
+ break;
+ case AArch64::DSB:
+ case AArch64::ISB:
+ // DSB and ISB also are scheduling barriers.
+ return true;
+ default:;
+ }
+ return isSEHInstruction(MI);
+}
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -1593,11 +1465,36 @@ bool AArch64InstrInfo::substituteCmpToZero(
}
bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+ if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+ MI.getOpcode() != AArch64::CATCHRET)
return false;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
+
+ if (MI.getOpcode() == AArch64::CATCHRET) {
+ // Skip to the first instruction before the epilog.
+ const TargetInstrInfo *TII =
+ MBB.getParent()->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+ auto MBBI = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+ while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+ FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::prev(FirstEpilogSEH);
+ if (FirstEpilogSEH != MBB.begin())
+ FirstEpilogSEH = std::next(FirstEpilogSEH);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+ .addReg(AArch64::X0, RegState::Define)
+ .addMBB(TargetMBB);
+ BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+ .addReg(AArch64::X0, RegState::Define)
+ .addReg(AArch64::X0)
+ .addMBB(TargetMBB)
+ .addImm(0);
+ return true;
+ }
+
unsigned Reg = MI.getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MI.memoperands_begin())->getValue());
@@ -1607,7 +1504,7 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if ((OpFlags & AArch64II::MO_GOT) != 0) {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
+ .addGlobalAddress(GV, 0, OpFlags);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1632,6 +1529,9 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin());
+ } else if (TM.getCodeModel() == CodeModel::Tiny) {
+ BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+ .addGlobalAddress(GV, 0, OpFlags);
} else {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
@@ -1647,71 +1547,6 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- case AArch64::EONWrs:
- case AArch64::EONXrs:
- case AArch64::EORWrs:
- case AArch64::EORXrs:
- case AArch64::ORNWrs:
- case AArch64::ORNXrs:
- case AArch64::ORRWrs:
- case AArch64::ORRXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- if (MI.getOperand(3).isImm()) {
- unsigned val = MI.getOperand(3).getImm();
- return (val != 0);
- }
- break;
- }
- return false;
-}
-
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::ADDSWrx:
- case AArch64::ADDSXrx:
- case AArch64::ADDSXrx64:
- case AArch64::ADDWrx:
- case AArch64::ADDXrx:
- case AArch64::ADDXrx64:
- case AArch64::SUBSWrx:
- case AArch64::SUBSXrx:
- case AArch64::SUBSXrx64:
- case AArch64::SUBWrx:
- case AArch64::SUBXrx:
- case AArch64::SUBXrx64:
- if (MI.getOperand(3).isImm()) {
- unsigned val = MI.getOperand(3).getImm();
- return (val != 0);
- }
- break;
- }
-
- return false;
-}
-
// Return true if this instruction simply sets its single destination register
// to zero. This is equivalent to a register rename of the zero-register.
bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
@@ -1834,67 +1669,6 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-/// Return true if this is load/store scales or extends its register offset.
-/// This refers to scaling a dynamic index as opposed to scaled immediates.
-/// MI should be a memory op that allows scaled addressing.
-bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- break;
- case AArch64::LDRBBroW:
- case AArch64::LDRBroW:
- case AArch64::LDRDroW:
- case AArch64::LDRHHroW:
- case AArch64::LDRHroW:
- case AArch64::LDRQroW:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSWroW:
- case AArch64::LDRSroW:
- case AArch64::LDRWroW:
- case AArch64::LDRXroW:
- case AArch64::STRBBroW:
- case AArch64::STRBroW:
- case AArch64::STRDroW:
- case AArch64::STRHHroW:
- case AArch64::STRHroW:
- case AArch64::STRQroW:
- case AArch64::STRSroW:
- case AArch64::STRWroW:
- case AArch64::STRXroW:
- case AArch64::LDRBBroX:
- case AArch64::LDRBroX:
- case AArch64::LDRDroX:
- case AArch64::LDRHHroX:
- case AArch64::LDRHroX:
- case AArch64::LDRQroX:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroX:
- case AArch64::LDRSroX:
- case AArch64::LDRWroX:
- case AArch64::LDRXroX:
- case AArch64::STRBBroX:
- case AArch64::STRBroX:
- case AArch64::STRDroX:
- case AArch64::STRHHroX:
- case AArch64::STRHroX:
- case AArch64::STRQroX:
- case AArch64::STRSroX:
- case AArch64::STRWroX:
- case AArch64::STRXroX:
-
- unsigned Val = MI.getOperand(3).getImm();
- AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
- return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
- }
- return false;
-}
-
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
@@ -2068,17 +1842,21 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
if (MI.hasOrderedMemoryRef())
return false;
- // Make sure this is a reg+imm (as opposed to an address reloc).
- assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
+ // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+ assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
+ "Expected a reg or frame index operand.");
if (!MI.getOperand(2).isImm())
return false;
// Can't merge/pair if the instruction modifies the base register.
// e.g., ldr x0, [x0]
- unsigned BaseReg = MI.getOperand(1).getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- if (MI.modifiesRegister(BaseReg, TRI))
- return false;
+ // This case will never occur with an FI base.
+ if (MI.getOperand(1).isReg()) {
+ unsigned BaseReg = MI.getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (MI.modifiesRegister(BaseReg, TRI))
+ return false;
+ }
// Check if this load/store has a hint to avoid pair formation.
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
@@ -2101,25 +1879,28 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return true;
}
-bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned Width;
- return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+ return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
}
-bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() == 3) {
// Non-paired instruction (e.g., ldr x1, [x0, #8]).
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+ if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+ !LdSt.getOperand(2).isImm())
return false;
} else if (LdSt.getNumExplicitOperands() == 4) {
// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
- if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
+ if (!LdSt.getOperand(1).isReg() ||
+ (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
!LdSt.getOperand(3).isImm())
return false;
} else
@@ -2138,13 +1919,18 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
// multiplied by the scaling factor. Unscaled instructions have scaling factor
// set to 1.
if (LdSt.getNumExplicitOperands() == 3) {
- BaseReg = LdSt.getOperand(1).getReg();
+ BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm() * Scale;
} else {
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseReg = LdSt.getOperand(2).getReg();
+ BaseOp = &LdSt.getOperand(2);
Offset = LdSt.getOperand(3).getImm() * Scale;
}
+
+ assert((BaseOp->isReg() || BaseOp->isFI()) &&
+ "getMemOperandWithOffset only supports base "
+ "operands of type register or frame index.");
+
return true;
}
@@ -2299,31 +2085,33 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
return true;
}
-// Scale the unscaled offsets. Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
- unsigned OffsetStride = 1;
+static unsigned getOffsetStride(unsigned Opc) {
switch (Opc) {
default:
- return false;
+ return 0;
case AArch64::LDURQi:
case AArch64::STURQi:
- OffsetStride = 16;
- break;
+ return 16;
case AArch64::LDURXi:
case AArch64::LDURDi:
case AArch64::STURXi:
case AArch64::STURDi:
- OffsetStride = 8;
- break;
+ return 8;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
case AArch64::STURWi:
case AArch64::STURSi:
- OffsetStride = 4;
- break;
+ return 4;
}
+}
+
+// Scale the unscaled offsets. Returns false if the unscaled offset can't be
+// scaled.
+static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+ unsigned OffsetStride = getOffsetStride(Opc);
+ if (OffsetStride == 0)
+ return false;
// If the byte-offset isn't a multiple of the stride, we can't scale this
// offset.
if (Offset % OffsetStride != 0)
@@ -2335,6 +2123,19 @@ static bool scaleOffset(unsigned Opc, int64_t &Offset) {
return true;
}
+// Unscale the scaled offsets. Returns false if the scaled offset can't be
+// unscaled.
+static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
+ unsigned OffsetStride = getOffsetStride(Opc);
+ if (OffsetStride == 0)
+ return false;
+
+ // Convert the "element" offset used by scaled pair load/store instructions
+ // into the byte-offset used by unscaled.
+ Offset *= OffsetStride;
+ return true;
+}
+
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
if (FirstOpc == SecondOpc)
return true;
@@ -2353,15 +2154,46 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
return false;
}
+static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+ int64_t Offset1, unsigned Opcode1, int FI2,
+ int64_t Offset2, unsigned Opcode2) {
+ // Accesses through fixed stack object frame indices may access a different
+ // fixed stack slot. Check that the object offsets + offsets match.
+ if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+ int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+ int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+ assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
+ // Get the byte-offset from the object offset.
+ if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
+ return false;
+ ObjectOffset1 += Offset1;
+ ObjectOffset2 += Offset2;
+ // Get the "element" index in the object.
+ if (!scaleOffset(Opcode1, ObjectOffset1) ||
+ !scaleOffset(Opcode2, ObjectOffset2))
+ return false;
+ return ObjectOffset1 + 1 == ObjectOffset2;
+ }
+
+ return FI1 == FI2;
+}
+
/// Detect opportunities for ldp/stp formation.
///
-/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
- unsigned BaseReg1,
- MachineInstr &SecondLdSt,
- unsigned BaseReg2,
+/// Only called for LdSt for which getMemOperandWithOffset returns true.
+bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+ MachineOperand &BaseOp2,
unsigned NumLoads) const {
- if (BaseReg1 != BaseReg2)
+ MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ if (BaseOp1.getType() != BaseOp2.getType())
+ return false;
+
+ assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+ "Only base registers and frame indices are supported.");
+
+ // Check for both base regs and base FI.
+ if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
return false;
// Only cluster up to a single pair.
@@ -2397,7 +2229,20 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return false;
// The caller should already have ordered First/SecondLdSt by offset.
- assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+ // Note: except for non-equal frame index bases
+ if (BaseOp1.isFI()) {
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
+ "Caller should have ordered offsets.");
+
+ const MachineFrameInfo &MFI =
+ FirstLdSt.getParent()->getParent()->getFrameInfo();
+ return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+ BaseOp2.getIndex(), Offset2, SecondOpc);
+ }
+
+ assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+ "Caller should have ordered offsets.");
+
return Offset1 + 1 == Offset2;
}
@@ -2478,7 +2323,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
- } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+ } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2515,7 +2360,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+ } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2730,13 +2575,33 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
llvm_unreachable("unimplemented reg-to-reg copy");
}
+static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const MCInstrDesc &MCID,
+ unsigned SrcReg, bool IsKill,
+ unsigned SubIdx0, unsigned SubIdx1, int FI,
+ MachineMemOperand *MMO) {
+ unsigned SrcReg0 = SrcReg;
+ unsigned SrcReg1 = SrcReg;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
+ SubIdx0 = 0;
+ SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
+ SubIdx1 = 0;
+ }
+ BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+ .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
+ .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
void AArch64InstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
bool isKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -2772,8 +2637,14 @@ void AArch64InstrInfo::storeRegToStackSlot(
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
else
assert(SrcReg != AArch64::SP);
- } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRDui;
+ } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::STPWi), SrcReg, isKill,
+ AArch64::sube32, AArch64::subo32, FI, MMO);
+ return;
+ }
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2783,14 +2654,9 @@ void AArch64InstrInfo::storeRegToStackSlot(
Opc = AArch64::ST1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
- .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
- getKillRegState(isKill))
- .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
- getKillRegState(isKill))
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMO);
+ storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::STPXi), SrcReg, isKill,
+ AArch64::sube64, AArch64::subo64, FI, MMO);
return;
}
break;
@@ -2829,7 +2695,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI);
@@ -2838,13 +2704,35 @@ void AArch64InstrInfo::storeRegToStackSlot(
MI.addMemOperand(MMO);
}
+static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const MCInstrDesc &MCID,
+ unsigned DestReg, unsigned SubIdx0,
+ unsigned SubIdx1, int FI,
+ MachineMemOperand *MMO) {
+ unsigned DestReg0 = DestReg;
+ unsigned DestReg1 = DestReg;
+ bool IsUndef = true;
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
+ SubIdx0 = 0;
+ DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
+ SubIdx1 = 0;
+ IsUndef = false;
+ }
+ BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+ .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
+ .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
void AArch64InstrInfo::loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -2880,8 +2768,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
else
assert(DestReg != AArch64::SP);
- } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRDui;
+ } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::LDPWi), DestReg, AArch64::sube32,
+ AArch64::subo32, FI, MMO);
+ return;
+ }
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2891,14 +2785,9 @@ void AArch64InstrInfo::loadRegFromStackSlot(
Opc = AArch64::LD1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
- .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
- getDefRegState(true))
- .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
- getDefRegState(true))
- .addFrameIndex(FI)
- .addImm(0)
- .addMemOperand(MMO);
+ loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+ get(AArch64::LDPXi), DestReg, AArch64::sube64,
+ AArch64::subo64, FI, MMO);
return;
}
break;
@@ -2937,7 +2826,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(DestReg, getDefRegState(true))
.addFrameIndex(FI);
if (Offset)
@@ -2949,7 +2838,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, int Offset,
const TargetInstrInfo *TII,
- MachineInstr::MIFlag Flag, bool SetNZCV) {
+ MachineInstr::MIFlag Flag, bool SetNZCV,
+ bool NeedsWinCFI) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -2994,6 +2884,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
.setMIFlag(Flag);
+ if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(ThisVal)
+ .setMIFlag(Flag);
+
SrcReg = DestReg;
Offset -= ThisVal;
if (Offset == 0)
@@ -3004,6 +2899,21 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.addImm(Offset)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.setMIFlag(Flag);
+
+ if (NeedsWinCFI) {
+ if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
+ (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+ if (Offset == 0)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
+ setMIFlag(Flag);
+ else
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
+ addImm(Offset).setMIFlag(Flag);
+ } else if (DestReg == AArch64::SP) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
+ addImm(Offset).setMIFlag(Flag);
+ }
+ }
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
@@ -4839,7 +4749,10 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
+ {MO_COFFSTUB, "aarch64-coffstub"},
+ {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
+ {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
+ {MO_DLLIMPORT, "aarch64-dllimport"}};
return makeArrayRef(TargetFlags);
}
@@ -4941,11 +4854,13 @@ enum MachineOutlinerClass {
enum MachineOutlinerMBBFlags {
LRUnavailableSomewhere = 0x2,
- HasCalls = 0x4
+ HasCalls = 0x4,
+ UnsafeRegsDead = 0x8
};
unsigned
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ assert(C.LRUWasSet && "LRU wasn't set?");
MachineFunction *MF = C.getMF();
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
@@ -4968,17 +4883,22 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
- unsigned SequenceSize = std::accumulate(
- RepeatedSequenceLocs[0].front(),
- std::next(RepeatedSequenceLocs[0].back()),
- 0, [this](unsigned Sum, const MachineInstr &MI) {
- return Sum + getInstSizeInBytes(MI);
- });
-
- // Compute liveness information for each candidate.
+ outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+ unsigned SequenceSize =
+ std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+ [this](unsigned Sum, const MachineInstr &MI) {
+ return Sum + getInstSizeInBytes(MI);
+ });
+
+ // Properties about candidate MBBs that hold for all of them.
+ unsigned FlagsSetInAll = 0xF;
+
+ // Compute liveness information for each candidate, and set FlagsSetInAll.
const TargetRegisterInfo &TRI = getRegisterInfo();
std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
+ [&FlagsSetInAll](outliner::Candidate &C) {
+ FlagsSetInAll &= C.Flags;
+ });
// According to the AArch64 Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
@@ -4991,23 +4911,31 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// of these registers is live into/across it. Thus, we need to delete
// those
// candidates.
- auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
+ auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+ // If the unsafe registers in this block are all dead, then we don't need
+ // to compute liveness here.
+ if (C.Flags & UnsafeRegsDead)
+ return false;
+ C.initLRU(TRI);
LiveRegUnits LRU = C.LRU;
return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
!LRU.available(AArch64::NZCV));
};
- // Erase every candidate that violates the restrictions above. (It could be
- // true that we have viable candidates, so it's not worth bailing out in
- // the case that, say, 1 out of 20 candidates violate the restructions.)
- RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- CantGuaranteeValueAcrossCall),
- RepeatedSequenceLocs.end());
+ // Are there any candidates where those registers are live?
+ if (!(FlagsSetInAll & UnsafeRegsDead)) {
+ // Erase every candidate that violates the restrictions above. (It could be
+ // true that we have viable candidates, so it's not worth bailing out in
+ // the case that, say, 1 out of 20 candidates violate the restructions.)
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CantGuaranteeValueAcrossCall),
+ RepeatedSequenceLocs.end());
- // If the sequence is empty, we're done.
- if (RepeatedSequenceLocs.empty())
- return outliner::OutlinedFunction();
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -5024,6 +4952,64 @@ AArch64InstrInfo::getOutliningCandidateInfo(
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = 4;
+ bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
+ });
+
+ // Returns true if an instructions is safe to fix up, false otherwise.
+ auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
+ if (MI.isCall())
+ return true;
+
+ if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
+ !MI.readsRegister(AArch64::SP, &TRI))
+ return true;
+
+ // Any modification of SP will break our code to save/restore LR.
+ // FIXME: We could handle some instructions which add a constant
+ // offset to SP, with a bit more work.
+ if (MI.modifiesRegister(AArch64::SP, &TRI))
+ return false;
+
+ // At this point, we have a stack instruction that we might need to
+ // fix up. We'll handle it if it's a load or store.
+ if (MI.mayLoadOrStore()) {
+ MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+
+ // Does it allow us to offset the base operand and is the base the
+ // register SP?
+ if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
+ Base->getReg() != AArch64::SP)
+ return false;
+
+ // Find the minimum/maximum offset for this instruction and check
+ // if fixing it up would be in range.
+ int64_t MinOffset,
+ MaxOffset; // Unscaled offsets for the instruction.
+ unsigned Scale; // The scale to multiply the offsets by.
+ unsigned DummyWidth;
+ getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
+
+ Offset += 16; // Update the offset to what it would be if we outlined.
+ if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+ return false;
+
+ // It's in range, so we can outline it.
+ return true;
+ }
+
+ // FIXME: Add handling for instructions like "add x0, sp, #8".
+
+ // We can't fix it up, so don't outline it.
+ return false;
+ };
+
+ // True if it's possible to fix up each stack instruction in this sequence.
+ // Important for frames/call variants that modify the stack.
+ bool AllStackInstrsSafe = std::all_of(
+ FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
+
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
@@ -5032,65 +5018,102 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerTailCall, 4);
}
- else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
+ else if (LastInstrOpcode == AArch64::BL ||
+ (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
SetCandidateCallInfo(MachineOutlinerThunk, 4);
}
- // Make sure that LR isn't live on entry to this candidate. The only
- // instructions that use LR that could possibly appear in a repeated sequence
- // are calls. Therefore, we only have to check and see if LR is dead on entry
- // to (or exit from) some candidate.
- else if (std::all_of(RepeatedSequenceLocs.begin(),
- RepeatedSequenceLocs.end(),
- [](outliner::Candidate &C) {
- return C.LRU.available(AArch64::LR);
- })) {
- FrameID = MachineOutlinerNoLRSave;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
- }
-
- // LR is live, so we need to save it. Decide whether it should be saved to
- // the stack, or if it can be saved to a register.
else {
- if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [this](outliner::Candidate &C) {
- return findRegisterToSaveLRTo(C);
- })) {
- // Every candidate has an available callee-saved register for the save.
- // We can save LR to a register.
- FrameID = MachineOutlinerRegSave;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ // We need to decide how to emit calls + frames. We can always emit the same
+ // frame if we don't need to save to the stack. If we have to save to the
+ // stack, then we need a different frame.
+ unsigned NumBytesNoStackCalls = 0;
+ std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ C.initLRU(TRI);
+
+ // Is LR available? If so, we don't need a save.
+ if (C.LRU.available(AArch64::LR)) {
+ NumBytesNoStackCalls += 4;
+ C.setCallInfo(MachineOutlinerNoLRSave, 4);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is an unused register available? If so, we won't modify the stack, so
+ // we can outline with the same frame type as those that don't save LR.
+ else if (findRegisterToSaveLRTo(C)) {
+ NumBytesNoStackCalls += 12;
+ C.setCallInfo(MachineOutlinerRegSave, 12);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is SP used in the sequence at all? If not, we don't have to modify
+ // the stack, so we are guaranteed to get the same frame.
+ else if (C.UsedInSequence.available(AArch64::SP)) {
+ NumBytesNoStackCalls += 12;
+ C.setCallInfo(MachineOutlinerDefault, 12);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // If we outline this, we need to modify the stack. Pretend we don't
+ // outline this by saving all of its bytes.
+ else {
+ NumBytesNoStackCalls += SequenceSize;
+ }
}
- else {
- // At least one candidate does not have an available callee-saved
- // register. We must save LR to the stack.
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
+ // If there are no places where we have to save LR, then note that we
+ // don't have to update the stack. Otherwise, give every candidate the
+ // default call type, as long as it's safe to do so.
+ if (!AllStackInstrsSafe ||
+ NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
+ RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+ FrameID = MachineOutlinerNoLRSave;
+ } else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
}
+
+ // If we dropped all of the candidates, bail out here.
+ if (RepeatedSequenceLocs.size() < 2) {
+ RepeatedSequenceLocs.clear();
+ return outliner::OutlinedFunction();
+ }
}
- // Check if the range contains a call. These require a save + restore of the
- // link register.
- if (std::any_of(RepeatedSequenceLocs[0].front(),
- RepeatedSequenceLocs[0].back(),
- [](const MachineInstr &MI) { return MI.isCall(); }))
- NumBytesToCreateFrame += 8; // Save + restore the link register.
+ // Does every candidate's MBB contain a call? If so, then we might have a call
+ // in the range.
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ // Check if the range contains a call. These require a save + restore of the
+ // link register.
+ bool ModStackToSaveLR = false;
+ if (std::any_of(FirstCand.front(), FirstCand.back(),
+ [](const MachineInstr &MI) { return MI.isCall(); }))
+ ModStackToSaveLR = true;
+
+ // Handle the last instruction separately. If this is a tail call, then the
+ // last instruction is a call. We don't want to save + restore in this case.
+ // However, it could be possible that the last instruction is a call without
+ // it being valid to tail call this sequence. We should consider this as
+ // well.
+ else if (FrameID != MachineOutlinerThunk &&
+ FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+ ModStackToSaveLR = true;
+
+ if (ModStackToSaveLR) {
+ // We can't fix up the stack. Bail out.
+ if (!AllStackInstrsSafe) {
+ RepeatedSequenceLocs.clear();
+ return outliner::OutlinedFunction();
+ }
- // Handle the last instruction separately. If this is a tail call, then the
- // last instruction is a call. We don't want to save + restore in this case.
- // However, it could be possible that the last instruction is a call without
- // it being valid to tail call this sequence. We should consider this as well.
- else if (FrameID != MachineOutlinerThunk &&
- FrameID != MachineOutlinerTailCall &&
- RepeatedSequenceLocs[0].back()->isCall())
- NumBytesToCreateFrame += 8;
+ // Save + restore LR.
+ NumBytesToCreateFrame += 8;
+ }
+ }
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
@@ -5122,30 +5145,70 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
return true;
}
-unsigned
-AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
- unsigned Flags = 0x0;
- // Check if there's a call inside this MachineBasicBlock. If there is, then
- // set a flag.
- if (std::any_of(MBB.begin(), MBB.end(),
- [](MachineInstr &MI) { return MI.isCall(); }))
- Flags |= MachineOutlinerMBBFlags::HasCalls;
-
+bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
// Check if LR is available through all of the MBB. If it's not, then set
// a flag.
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
"Suitable Machine Function for outlining must track liveness");
LiveRegUnits LRU(getRegisterInfo());
- LRU.addLiveOuts(MBB);
- std::for_each(MBB.rbegin(),
- MBB.rend(),
+ std::for_each(MBB.rbegin(), MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
- if (!LRU.available(AArch64::LR))
- Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+ // Check if each of the unsafe registers are available...
+ bool W16AvailableInBlock = LRU.available(AArch64::W16);
+ bool W17AvailableInBlock = LRU.available(AArch64::W17);
+ bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
+
+ // If all of these are dead (and not live out), we know we don't have to check
+ // them later.
+ if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
+ Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+ // Now, add the live outs to the set.
+ LRU.addLiveOuts(MBB);
- return Flags;
+ // If any of these registers is available in the MBB, but also a live out of
+ // the block, then we know outlining is unsafe.
+ if (W16AvailableInBlock && !LRU.available(AArch64::W16))
+ return false;
+ if (W17AvailableInBlock && !LRU.available(AArch64::W17))
+ return false;
+ if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
+ return false;
+
+ // Check if there's a call inside this MachineBasicBlock. If there is, then
+ // set a flag.
+ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+ MachineFunction *MF = MBB.getParent();
+
+ // In the event that we outline, we may have to save LR. If there is an
+ // available register in the MBB, then we'll always save LR there. Check if
+ // this is true.
+ bool CanSaveLR = false;
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
+ Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
+ CanSaveLR = true;
+ break;
+ }
+ }
+
+ // Check if we have a register we can save LR to, and if LR was used
+ // somewhere. If both of those things are true, then we need to evaluate the
+ // safety of outlining stack instructions later.
+ if (!CanSaveLR && !LRU.available(AArch64::LR))
+ Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+ return true;
}
outliner::InstrType
@@ -5268,108 +5331,19 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
return outliner::InstrType::Illegal;
- // Does this use the stack?
- if (MI.modifiesRegister(AArch64::SP, &RI) ||
- MI.readsRegister(AArch64::SP, &RI)) {
- // True if there is no chance that any outlined candidate from this range
- // could require stack fixups. That is, both
- // * LR is available in the range (No save/restore around call)
- // * The range doesn't include calls (No save/restore in outlined frame)
- // are true.
- // FIXME: This is very restrictive; the flags check the whole block,
- // not just the bit we will try to outline.
- bool MightNeedStackFixUp =
- (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
- MachineOutlinerMBBFlags::HasCalls));
-
- // If this instruction is in a range where it *never* needs to be fixed
- // up, then we can *always* outline it. This is true even if it's not
- // possible to fix that instruction up.
- //
- // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
- // use SP. Suppose that I1 sits within a range that definitely doesn't
- // need stack fixups, while I2 sits in a range that does.
- //
- // First, I1 can be outlined as long as we *never* fix up the stack in
- // any sequence containing it. I1 is already a safe instruction in the
- // original program, so as long as we don't modify it we're good to go.
- // So this leaves us with showing that outlining I2 won't break our
- // program.
- //
- // Suppose I1 and I2 belong to equivalent candidate sequences. When we
- // look at I2, we need to see if it can be fixed up. Suppose I2, (and
- // thus I1) cannot be fixed up. Then I2 will be assigned an unique
- // integer label; thus, I2 cannot belong to any candidate sequence (a
- // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
- // as well, so we're good. Thus, I1 is always safe to outline.
- //
- // This gives us two things: first off, it buys us some more instructions
- // for our search space by deeming stack instructions illegal only when
- // they can't be fixed up AND we might have to fix them up. Second off,
- // This allows us to catch tricky instructions like, say,
- // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
- // be paired with later SUBXris, which might *not* end up being outlined.
- // If we mess with the stack to save something, then an ADDXri messes with
- // it *after*, then we aren't going to restore the right something from
- // the stack if we don't outline the corresponding SUBXri first. ADDXris and
- // SUBXris are extremely common in prologue/epilogue code, so supporting
- // them in the outliner can be a pretty big win!
- if (!MightNeedStackFixUp)
- return outliner::InstrType::Legal;
-
- // Any modification of SP will break our code to save/restore LR.
- // FIXME: We could handle some instructions which add a constant offset to
- // SP, with a bit more work.
- if (MI.modifiesRegister(AArch64::SP, &RI))
- return outliner::InstrType::Illegal;
-
- // At this point, we have a stack instruction that we might need to fix
- // up. We'll handle it if it's a load or store.
- if (MI.mayLoadOrStore()) {
- unsigned Base; // Filled with the base regiser of MI.
- int64_t Offset; // Filled with the offset of MI.
- unsigned DummyWidth;
-
- // Does it allow us to offset the base register and is the base SP?
- if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
- Base != AArch64::SP)
- return outliner::InstrType::Illegal;
-
- // Find the minimum/maximum offset for this instruction and check if
- // fixing it up would be in range.
- int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
- unsigned Scale; // The scale to multiply the offsets by.
- getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
-
- // TODO: We should really test what happens if an instruction overflows.
- // This is tricky to test with IR tests, but when the outliner is moved
- // to a MIR test, it really ought to be checked.
- Offset += 16; // Update the offset to what it would be if we outlined.
- if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
- return outliner::InstrType::Illegal;
-
- // It's in range, so we can outline it.
- return outliner::InstrType::Legal;
- }
-
- // FIXME: Add handling for instructions like "add x0, sp, #8".
-
- // We can't fix it up, so don't outline it.
- return outliner::InstrType::Illegal;
- }
-
return outliner::InstrType::Legal;
}
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
- unsigned Base, Width;
+ MachineOperand *Base;
+ unsigned Width;
int64_t Offset;
// Is this a load or store with an immediate offset with SP as the base?
if (!MI.mayLoadOrStore() ||
- !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
- Base != AArch64::SP)
+ !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+ (Base->isReg() && Base->getReg() != AArch64::SP))
continue;
// It is, so we have to fix it up.
@@ -5401,7 +5375,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
TailOpcode = AArch64::TCRETURNdi;
} else {
assert(Call->getOpcode() == AArch64::BLR);
- TailOpcode = AArch64::TCRETURNri;
+ TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
.add(Call->getOperand(0))
@@ -5562,3 +5536,6 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
MachineFunction &MF) const {
return MF.getFunction().optForMinSize();
}
+
+#define GET_INSTRINFO_HELPERS
+#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 11882e238b70..9954669d5675 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -62,14 +62,6 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- /// Returns true if there is a shiftable register and that the shift value
- /// is non-zero.
- static bool hasShiftedReg(const MachineInstr &MI);
-
- /// Returns true if there is an extendable register and that the extending
- /// value is non-zero.
- static bool hasExtendedReg(const MachineInstr &MI);
-
/// Does this instruction set its full destination register to zero?
static bool isGPRZero(const MachineInstr &MI);
@@ -79,11 +71,6 @@ public:
/// Does this instruction rename an FPR without modifying bits?
static bool isFPRCopy(const MachineInstr &MI);
- /// Return true if this is load/store scales or extends its register offset.
- /// This refers to scaling a dynamic index as opposed to scaled immediates.
- /// MI should be a memory op that allows scaled addressing.
- static bool isScaledAddr(const MachineInstr &MI);
-
/// Return true if pairing the given load or store is hinted to be
/// unprofitable.
static bool isLdStPairSuppressed(const MachineInstr &MI);
@@ -110,13 +97,13 @@ public:
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
- bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const;
+ bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
@@ -128,8 +115,7 @@ public:
bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
int64_t &MinOffset, int64_t &MaxOffset) const;
- bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
- MachineInstr &SecondLdSt, unsigned BaseReg2,
+ bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
unsigned NumLoads) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -189,6 +175,10 @@ public:
unsigned FalseReg) const override;
void getNoop(MCInst &NopInst) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -242,7 +232,8 @@ public:
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
outliner::InstrType
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
- unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const override;
+ bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const override;
MachineBasicBlock::iterator
@@ -250,15 +241,15 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
- /// Returns true if the instruction sets to an immediate value that can be
- /// executed more efficiently.
- bool isExynosResetFast(const MachineInstr &MI) const;
- /// Returns true if the instruction has a shift left that can be executed
- /// more efficiently.
- bool isExynosShiftLeftFast(const MachineInstr &MI) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
- bool isFalkorShiftExtFast(const MachineInstr &MI) const;
+ static bool isFalkorShiftExtFast(const MachineInstr &MI);
+ /// Return true if the instructions is a SEH instruciton used for unwinding
+ /// on Windows.
+ static bool isSEHInstruction(const MachineInstr &MI);
+
+#define GET_INSTRINFO_HELPER_DECLS
+#include "AArch64GenInstrInfo.inc"
private:
/// Sets the offsets on outlined instructions in \p MBB which use SP
@@ -286,7 +277,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
int Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
- bool SetNZCV = false);
+ bool SetNZCV = false, bool NeedsWinCFI = false);
/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
/// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d6b8bb5d89c7..c24b8b36441b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,56 @@ def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
+ AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def HasVH : Predicate<"Subtarget->hasVH()">,
+ AssemblerPredicate<"FeatureVH", "vh">;
+
+def HasLOR : Predicate<"Subtarget->hasLOR()">,
+ AssemblerPredicate<"FeatureLOR", "lor">;
+
+def HasPA : Predicate<"Subtarget->hasPA()">,
+ AssemblerPredicate<"FeaturePA", "pa">;
+
+def HasJS : Predicate<"Subtarget->hasJS()">,
+ AssemblerPredicate<"FeatureJS", "jsconv">;
+
+def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
+ AssemblerPredicate<"FeatureCCIDX", "ccidx">;
+
+def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
+ AssemblerPredicate<"FeatureComplxNum", "complxnum">;
+
+def HasNV : Predicate<"Subtarget->hasNV()">,
+ AssemblerPredicate<"FeatureNV", "nv">;
+
+def HasRASv8_4 : Predicate<"Subtarget->hasRASv8_4()">,
+ AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
+
+def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
+ AssemblerPredicate<"FeatureMPAM", "mpam">;
+
+def HasDIT : Predicate<"Subtarget->hasDIT()">,
+ AssemblerPredicate<"FeatureDIT", "dit">;
+
+def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
+ AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
+
+def HasAM : Predicate<"Subtarget->hasAM()">,
+ AssemblerPredicate<"FeatureAM", "am">;
+
+def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
+ AssemblerPredicate<"FeatureSEL2", "sel2">;
+
+def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
+ AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
+
+def HasFMI : Predicate<"Subtarget->hasFMI()">,
+ AssemblerPredicate<"FeatureFMI", "fmi">;
+
+def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
+ AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
+
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -49,6 +99,8 @@ def HasRDM : Predicate<"Subtarget->hasRDM()">,
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
+ AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<"FeatureSPE", "spe">;
def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
@@ -58,7 +110,20 @@ def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
-
+def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
+ AssemblerPredicate<"FeatureAltFPCmp", "altnzcv">;
+def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
+ AssemblerPredicate<"FeatureFRInt3264", "frint3264">;
+def HasSB : Predicate<"Subtarget->hasSB()">,
+ AssemblerPredicate<"FeatureSB", "sb">;
+def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
+ AssemblerPredicate<"FeaturePredRes", "predres">;
+def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
+ AssemblerPredicate<"FeatureCacheDeepPersist", "ccdp">;
+def HasBTI : Predicate<"Subtarget->hasBTI()">,
+ AssemblerPredicate<"FeatureBranchTargetId", "bti">;
+def HasMTE : Predicate<"Subtarget->hasMTE()">,
+ AssemblerPredicate<"FeatureMTE", "mte">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def UseAlternateSExtLoadCVTF32
@@ -174,6 +239,7 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
@@ -342,6 +408,9 @@ let RecomputePerFunction = 1 in {
def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">;
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+
+ def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+ def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
}
include "AArch64InstrFormats.td"
@@ -427,6 +496,38 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
+// 32-bit jump table destination is actually only 2 instructions since we can
+// use the table itself as a PC-relative base. But optimization occurs after
+// branch relaxation so be pessimistic.
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+}
+
+// Space-consuming pseudo to aid testing of placement and reachability
+// algorithms. Immediate operand is the number of bytes this "instruction"
+// occupies; register operands can be used to enforce dependency and constrain
+// the scheduler.
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
+ [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
+ Sched<[]>;
+
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
+ def SpeculationSafeValueX
+ : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
+ def SpeculationSafeValueW
+ : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
+}
+
+
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
@@ -440,6 +541,8 @@ def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
def : InstAlias<"csdb", (HINT 20)>;
+def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
+def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
// v8.2a Statistical Profiling extension
def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
@@ -463,11 +566,11 @@ def ISB : CRmSystemI<barrier_op, 0b110, "isb",
def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
let CRm = 0b0010;
let Inst{12} = 0;
- let Predicates = [HasV8_4a];
+ let Predicates = [HasTRACEV8_4];
}
}
-// ARMv8.2 Dot Product
+// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
@@ -475,6 +578,18 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
}
+// ARMv8.2-A FP16 Fused Multiply-Add Long
+let Predicates = [HasNEON, HasFP16FML] in {
+defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
+defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
+}
+
// Armv8.2-A Crypto extensions
let Predicates = [HasSHA3] in {
def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
@@ -543,7 +658,7 @@ let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
}
// These pointer authentication isntructions require armv8.3a
-let Predicates = [HasV8_3a] in {
+let Predicates = [HasPA] in {
multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
@@ -583,17 +698,17 @@ let Predicates = [HasV8_3a] in {
defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
- // v8.3a floating point conversion for javascript
- let Predicates = [HasV8_3a, HasFPARMv8] in
- def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
- "fjcvtzs", []> {
- let Inst{31} = 0;
- }
+}
-} // HasV8_3a
+// v8.3a floating point conversion for javascript
+let Predicates = [HasJS, HasFPARMv8] in
+def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+ "fjcvtzs", []> {
+ let Inst{31} = 0;
+} // HasJS, HasFPARMv8
// v8.4 Flag manipulation instructions
-let Predicates = [HasV8_4a] in {
+let Predicates = [HasFMI] in {
def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
let Inst{20-5} = 0b0000001000000000;
}
@@ -601,10 +716,39 @@ def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
"{\t$Rn, $imm, $mask}">;
-} // HasV8_4a
+} // HasFMI
+
+// v8.5 flag manipulation instructions
+let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
+
+def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
+ let Inst{18-16} = 0b000;
+ let Inst{11-8} = 0b0000;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{7-5} = 0b001;
+}
+
+def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
+ let Inst{18-16} = 0b000;
+ let Inst{11-8} = 0b0000;
+ let Unpredictable{11-8} = 0b1111;
+ let Inst{7-5} = 0b010;
+}
+} // HasAltNZCV
+
+
+// Armv8.5-A speculation barrier
+def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
+ let Inst{20-5} = 0b0001100110000111;
+ let Unpredictable{11-8} = 0b1111;
+ let Predicates = [HasSB];
+ let hasSideEffects = 1;
+}
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
+def : InstAlias<"ssbb", (DSB 0)>;
+def : InstAlias<"pssbb", (DSB 4)>;
def MRS : MRSI;
def MSR : MSRI;
@@ -1076,6 +1220,50 @@ defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
defm : STOPregister<"stumin","LDUMIN">;// STUMINx
+// v8.5 Memory Tagging Extension
+let Predicates = [HasMTE] in {
+
+def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
+ Sched<[]>{
+ let Inst{31} = 1;
+}
+def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
+ let Inst{31} = 1;
+ let isNotDuplicable = 1;
+}
+def ADDG : AddSubG<0, "addg", null_frag>;
+def SUBG : AddSubG<1, "subg", null_frag>;
+
+def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
+
+def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
+def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
+ let Defs = [NZCV];
+}
+
+def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
+
+def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
+def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
+
+def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
+ (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
+ let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
+}
+def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
+ (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
+
+defm STG : MemTagStore<0b00, "stg">;
+defm STZG : MemTagStore<0b01, "stzg">;
+defm ST2G : MemTagStore<0b10, "st2g">;
+defm STZ2G : MemTagStore<0b11, "stz2g">;
+
+defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
+def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
+def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
+
+} // Predicates = [HasMTE]
+
//===----------------------------------------------------------------------===//
// Logical instructions.
//===----------------------------------------------------------------------===//
@@ -1383,7 +1571,8 @@ def : InstAlias<"cneg $dst, $src, $cc",
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
-def ADR : ADRI<0, "adr", adrlabel, []>;
+def ADR : ADRI<0, "adr", adrlabel,
+ [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
@@ -1391,6 +1580,10 @@ def ADRP : ADRI<1, "adrp", adrplabel,
} // isReMaterializable = 1
// page address of a constant pool entry, block address
+def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
+def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
+def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
+def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
@@ -1434,6 +1627,10 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
let AsmString = ".tlsdesccall $sym";
}
+// Pseudo instruction to tell the streamer to emit a 'B' character into the
+// augmentation string.
+def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
+
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
@@ -1493,6 +1690,8 @@ def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
def : InstAlias<"dcps3", (DCPS3 0)>;
+def UDF : UDFType<0, "udf">;
+
//===----------------------------------------------------------------------===//
// Load instructions.
//===----------------------------------------------------------------------===//
@@ -1883,14 +2082,37 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
//---
// (literal)
-def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">;
-def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">;
-def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">;
-def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">;
-def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">;
+
+def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
+ const DataLayout &DL = MF->getDataLayout();
+ unsigned Align = G->getGlobal()->getPointerAlignment(DL);
+ return Align >= 4 && G->getOffset() % 4 == 0;
+ }
+ if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
+ return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
+ return false;
+}]>;
+
+def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
+ [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
+ [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
+ [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
+ [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
+ [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
// load sign-extended word
-def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">;
+def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
+ [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
+
+let AddedComplexity = 20 in {
+def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
+ (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
+}
// prefetch
def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
@@ -2467,8 +2689,9 @@ defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
[(truncstorei8 GPR32z:$Rt,
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
-// Armv8.4 LDAPR & STLR with Immediate Offset instruction
-let Predicates = [HasV8_4a] in {
+// Armv8.4 Weaker Release Consistency enhancements
+// LDAPR & STLR with Immediate Offset instructions
+let Predicates = [HasRCPC_IMMO] in {
defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
@@ -2753,7 +2976,7 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasLOR] in {
// v8.1a "Limited Order Region" extension load-acquire instructions
def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
@@ -2886,6 +3109,13 @@ let SchedRW = [WriteFDiv] in {
defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
}
+let Predicates = [HasFRInt3264] in {
+ defm FRINT32Z : FRIntNNT<0b00, "frint32z">;
+ defm FRINT64Z : FRIntNNT<0b10, "frint64z">;
+ defm FRINT32X : FRIntNNT<0b01, "frint32x">;
+ defm FRINT64X : FRIntNNT<0b11, "frint64x">;
+} // HasFRInt3264
+
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
@@ -2895,18 +3125,18 @@ let SchedRW = [WriteFDiv] in {
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
}
defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>;
defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>;
let SchedRW = [WriteFMul] in {
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
-def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
@@ -2983,6 +3213,42 @@ def F128CSEL : Pseudo<(outs FPR128:$Rd),
let hasNoSchedulingInfo = 1;
}
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on ARM64 Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+ def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
+ def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+ def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+ def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+}
+
+// Pseudo instructions for Windows EH
+//===----------------------------------------------------------------------===//
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+ isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
+ def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
+ let usesCustomInserter = 1 in
+ def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
+ Sched<[]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>;
//===----------------------------------------------------------------------===//
// Floating point immediate move.
@@ -3104,6 +3370,14 @@ defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+
+let Predicates = [HasFRInt3264] in {
+ defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
+ defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
+ defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
+ defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
+} // HasFRInt3264
+
defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
@@ -3224,11 +3498,11 @@ defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
@@ -3895,25 +4169,6 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
-// Patterns for smull2/umull2.
-multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))),
- (INST8B V128:$Rn, V128:$Rm)>;
- def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))),
- (INST4H V128:$Rn, V128:$Rm)>;
- def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))),
- (INST2S V128:$Rn, V128:$Rm)>;
-}
-
-defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
- SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
-defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
- UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
-
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -4004,44 +4259,43 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd),
defm EXT : SIMDBitwiseExtract<"ext">;
-def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-
-// We use EXT to handle extract_subvector to copy the upper 64-bits of a
-// 128-bit vector.
-def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-
+def AdjustExtImm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
+ def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+ def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+ // We use EXT to handle extract_subvector to copy the upper 64-bits of a
+ // 128-bit vector.
+ def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+ // A 64-bit EXT of two halves of the same 128-bit register can be done as a
+ // single 128-bit EXT.
+ def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
+ (extract_subvector V128:$Rn, (i64 N)),
+ (i32 imm:$imm))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
+ // A 64-bit EXT of the high half of a 128-bit register can be done using a
+ // 128-bit EXT of the whole register with an adjustment to the immediate. The
+ // top half of the other operand will be unset, but that doesn't matter as it
+ // will not be used.
+ def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
+ V64:$Rm,
+ (i32 imm:$imm))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ (AdjustExtImm imm:$imm)), dsub)>;
+}
+
+defm : ExtPat<v8i8, v16i8, 8>;
+defm : ExtPat<v4i16, v8i16, 4>;
+defm : ExtPat<v4f16, v8f16, 4>;
+defm : ExtPat<v2i32, v4i32, 2>;
+defm : ExtPat<v2f32, v4f32, 2>;
+defm : ExtPat<v1i64, v2i64, 1>;
+defm : ExtPat<v1f64, v2f64, 1>;
//----------------------------------------------------------------------------
// AdvSIMD zip vector
@@ -4137,6 +4391,12 @@ def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
+// DUP from a 64-bit register to a 64-bit register is just a copy
+def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
+ (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
+def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
+ (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
+
def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
(v2f32 (DUPv2i32lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
@@ -4739,16 +4999,6 @@ def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
(MOVID imm0_255:$shift)>;
-def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;
-
-def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
-
// EDIT byte mask: 2d
// The movi_edit node has the immediate value already encoded, so we use
@@ -4769,6 +5019,18 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
+// extract is free and this gives better MachineCSE results.
+def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+
+def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
@@ -5770,6 +6032,41 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
+// Multiply high patterns which multiply the lower subvector using smull/umull
+// and the upper subvector with smull2/umull2. Then shuffle the high the high
+// part of both results together.
+def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
+def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
// Conversions within AdvSIMD types in the same register size are free.
// But because we need a consistent lane ordering, in big endian many
// conversions require one or more REV instructions.
@@ -6481,10 +6778,24 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
Sched<[WriteBrReg]>;
def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
+ // Indirect tail-call with any register allowed, used by MachineOutliner when
+ // this is proven safe.
+ // FIXME: If we have to add any more hacks like this, we should instead relax
+ // some verifier checks for outlined functions.
+ def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
+ // Indirect tail-call limited to only use registers (x16 and x17) which are
+ // allowed to tail-call a "BTI c" instruction.
+ def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
}
def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
- (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
+ (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
+ Requires<[NotUseBTI]>;
+def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
+ Requires<[UseBTI]>;
def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b2b500320b5c..5eb589bf66d5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -65,6 +65,16 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ // Helper to generate an equivalent of scalar_to_vector into a new register,
+ // returned via 'Dst'.
+ bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
+ const TargetRegisterClass *DstRC, unsigned Scalar,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineRegisterInfo &MRI) const;
+ bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
@@ -667,7 +677,7 @@ void AArch64InstructionSelector::materializeLargeCMVal(
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineIRBuilder MIB(I);
- auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+ auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
@@ -779,16 +789,36 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- if (selectCompareBranch(I, MF, MRI))
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ bool ProduceNonFlagSettingCondBr =
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+ if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
return true;
- auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
- .addUse(CondReg)
- .addImm(/*bit offset=*/0)
- .addMBB(DestMBB);
+ if (ProduceNonFlagSettingCondBr) {
+ auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
+ .addUse(CondReg)
+ .addImm(/*bit offset=*/0)
+ .addMBB(DestMBB);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ } else {
+ auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
+ .addDef(AArch64::WZR)
+ .addUse(CondReg)
+ .addImm(1);
+ constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
+ auto Bcc =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
+ .addImm(AArch64CC::EQ)
+ .addMBB(DestMBB);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
+ }
}
case TargetOpcode::G_BRINDIRECT: {
@@ -983,6 +1013,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
+ } else if (TM.getCodeModel() == CodeModel::Tiny) {
+ I.setDesc(TII.get(AArch64::ADR));
+ I.getOperand(1).setTargetFlags(OpFlags);
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
@@ -1010,12 +1043,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
unsigned MemSizeInBits = MemOp.getSize() * 8;
- // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by
- // folding with an extend. Until we have a G_SEXTLOAD solution bail out if
- // we hit one.
- if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile())
- return false;
-
const unsigned PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
@@ -1525,11 +1552,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
+ case TargetOpcode::G_BUILD_VECTOR:
+ return selectBuildVector(I, MRI);
+ case TargetOpcode::G_MERGE_VALUES:
+ return selectMergeValues(I, MRI);
}
return false;
}
+bool AArch64InstructionSelector::emitScalarToVector(
+ unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
+ unsigned Scalar, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
+ Dst = MRI.createVirtualRegister(DstRC);
+
+ unsigned UndefVec = MRI.createVirtualRegister(DstRC);
+ MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII.get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(UndefVec);
+
+ auto BuildFn = [&](unsigned SubregIndex) {
+ MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII.get(TargetOpcode::INSERT_SUBREG))
+ .addDef(Dst)
+ .addUse(UndefVec)
+ .addUse(Scalar)
+ .addImm(SubregIndex);
+ constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+ };
+
+ switch (DstTy.getElementType().getSizeInBits()) {
+ case 32:
+ return BuildFn(AArch64::ssub);
+ case 64:
+ return BuildFn(AArch64::dsub);
+ default:
+ return false;
+ }
+}
+
+bool AArch64InstructionSelector::selectMergeValues(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
+
+ // At the moment we only support merging two s32s into an s64.
+ if (I.getNumOperands() != 3)
+ return false;
+ if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
+ return false;
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+ if (RB.getID() != AArch64::GPRRegBankID)
+ return false;
+
+ auto *DstRC = &AArch64::GPR64RegClass;
+ unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(SubToRegDef)
+ .addImm(0)
+ .addUse(I.getOperand(1).getReg())
+ .addImm(AArch64::sub_32);
+ unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
+ // Need to anyext the second scalar before we can use bfm
+ MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(SubToRegDef2)
+ .addImm(0)
+ .addUse(I.getOperand(2).getReg())
+ .addImm(AArch64::sub_32);
+ MachineInstr &BFM =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(SubToRegDef)
+ .addUse(SubToRegDef2)
+ .addImm(32)
+ .addImm(31);
+ constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectBuildVector(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ // Until we port more of the optimized selections, for now just use a vector
+ // insert sequence.
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned EltSize = EltTy.getSizeInBits();
+ if (EltSize < 32 || EltSize > 64)
+ return false; // Don't support all element types yet.
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+ unsigned Opc;
+ unsigned SubregIdx;
+ if (RB.getID() == AArch64::GPRRegBankID) {
+ if (EltSize == 32) {
+ Opc = AArch64::INSvi32gpr;
+ SubregIdx = AArch64::ssub;
+ } else {
+ Opc = AArch64::INSvi64gpr;
+ SubregIdx = AArch64::dsub;
+ }
+ } else {
+ if (EltSize == 32) {
+ Opc = AArch64::INSvi32lane;
+ SubregIdx = AArch64::ssub;
+ } else {
+ Opc = AArch64::INSvi64lane;
+ SubregIdx = AArch64::dsub;
+ }
+ }
+
+ if (EltSize * DstTy.getNumElements() != 128)
+ return false; // Don't handle unpacked vectors yet.
+
+ unsigned DstVec = 0;
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+ DstTy, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+ emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
+ *I.getParent(), I.getIterator(), MRI);
+ for (unsigned i = 2, e = DstTy.getSizeInBits() / EltSize + 1; i < e; ++i) {
+ unsigned InsDef;
+ // For the last insert re-use the dst reg of the G_BUILD_VECTOR.
+ if (i + 1 < e)
+ InsDef = MRI.createVirtualRegister(DstRC);
+ else
+ InsDef = I.getOperand(0).getReg();
+ unsigned LaneIdx = i - 1;
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned ImpDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(ImpDef);
+ unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
+ MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::INSERT_SUBREG))
+ .addDef(InsSubDef)
+ .addUse(ImpDef)
+ .addUse(I.getOperand(i).getReg())
+ .addImm(SubregIdx);
+ MachineInstr &InsEltMI =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .addDef(InsDef)
+ .addUse(DstVec)
+ .addImm(LaneIdx)
+ .addUse(InsSubDef)
+ .addImm(0);
+ constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
+ DstVec = InsDef;
+ } else {
+ MachineInstr &InsMI =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .addDef(InsDef)
+ .addUse(DstVec)
+ .addImm(LaneIdx)
+ .addUse(I.getOperand(i).getReg());
+ constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+ DstVec = InsDef;
+ }
+ }
+ I.eraseFromParent();
+ return true;
+}
+
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 327c758a7f8e..6f7fb7a8bc21 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -48,9 +48,21 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
const LLT v2s64 = LLT::vector(2, 64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64})
- .clampScalar(0, s1, s64)
- .widenScalarToNextPow2(0, 8);
+ .legalFor({p0, s1, s8, s16, s32, s64, v2s64})
+ .clampScalar(0, s1, s64)
+ .widenScalarToNextPow2(0, 8)
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() &&
+ (Query.Types[0].getElementType() != s64 ||
+ Query.Types[0].getNumElements() != 2);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT EltTy = Query.Types[0].getElementType();
+ if (EltTy == s64)
+ return std::make_pair(0, LLT::vector(2, 64));
+ return std::make_pair(0, EltTy);
+ });
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
@@ -97,6 +109,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+ getActionDefinitionsBuilder(G_FCEIL)
+ // If we don't have full FP16 support, then widen s16 to s32 if we
+ // encounter it.
+ .widenScalarIf(
+ [=, &ST](const LegalityQuery &Query) {
+ return Query.Types[0] == s16 && !ST.hasFullFP16();
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
+ .legalFor({s16, s32, s64, v2s32, v4s32, v2s64});
+
getActionDefinitionsBuilder(G_INSERT)
.unsupportedIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
@@ -167,9 +189,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.unsupportedIfMemSizeNotPow2()
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
.lowerIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+ return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32);
+ .clampNumElements(0, v2s32, v2s32)
+ .clampMaxNumElements(0, s64, 1);
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemSize({{s8, p0, 8},
@@ -185,9 +208,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.unsupportedIfMemSizeNotPow2()
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
- Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+ Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32);
+ .clampNumElements(0, v2s32, v2s32)
+ .clampMaxNumElements(0, s64, 1);
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
@@ -385,13 +409,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
});
}
+ getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+ .unsupportedIf([=](const LegalityQuery &Query) {
+ const LLT &EltTy = Query.Types[1].getElementType();
+ return Query.Types[0] != EltTy;
+ })
+ .minScalar(2, s64)
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT &VecTy = Query.Types[1];
+ return VecTy == v4s32 || VecTy == v2s64;
+ });
+
+ getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalFor({{v4s32, s32}, {v2s64, s64}})
+ .clampNumElements(0, v4s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+
+ // Deal with larger scalar types, which will be implicitly truncated.
+ .legalIf([=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() <
+ Query.Types[1].getSizeInBits();
+ })
+ .minScalarSameAs(1, 0);
+
computeTables();
verify(*ST.getInstrInfo());
}
bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
switch (MI.getOpcode()) {
default:
// No idea what to do.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
index a745b0edbc6d..77e8bdc7623c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -28,7 +29,8 @@ public:
AArch64LegalizerInfo(const AArch64Subtarget &ST);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 4a19ecd69103..aa732a99469c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -702,7 +702,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
.add(BaseRegOp)
.addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*MergeMI))
+ .cloneMergedMemRefs({&*I, &*MergeMI})
.setMIFlags(I->mergeFlagsWith(*MergeMI));
(void)MIB;
@@ -819,7 +819,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
.add(RegOp1)
.add(BaseRegOp)
.addImm(OffsetImm)
- .setMemRefs(I->mergeMemRefsWith(*Paired))
+ .cloneMergedMemRefs({&*I, &*Paired})
.setMIFlags(I->mergeFlagsWith(*Paired));
(void)MIB;
@@ -1338,7 +1338,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))
.addImm(Value)
- .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+ .setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
@@ -1349,7 +1349,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I, 1))
.add(getLdStBaseOp(*I))
.addImm(Value / Scale)
- .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+ .setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
(void)MIB;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 6c0263585933..d71359223b1b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -44,16 +45,31 @@ AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
assert(TheTriple.isOSWindows() &&
"Windows is the only supported COFF target");
- bool IsIndirect = (TargetFlags & AArch64II::MO_DLLIMPORT);
+ bool IsIndirect = (TargetFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB));
if (!IsIndirect)
return Printer.getSymbol(GV);
SmallString<128> Name;
- Name = "__imp_";
+ if (TargetFlags & AArch64II::MO_DLLIMPORT)
+ Name = "__imp_";
+ else if (TargetFlags & AArch64II::MO_COFFSTUB)
+ Name = ".refptr.";
Printer.TM.getNameWithPrefix(Name, GV,
Printer.getObjFileLowering().getMangler());
- return Ctx.getOrCreateSymbol(Name);
+ MCSymbol *MCSym = Ctx.getOrCreateSymbol(Name);
+
+ if (TargetFlags & AArch64II::MO_COFFSTUB) {
+ MachineModuleInfoCOFF &MMICOFF =
+ Printer.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMICOFF.getGVStubEntry(MCSym);
+
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(Printer.getSymbol(GV), true);
+ }
+
+ return MCSym;
}
MCSymbol *
@@ -173,20 +189,51 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
MCSymbol *Sym) const {
- AArch64MCExpr::VariantKind RefKind = AArch64MCExpr::VK_NONE;
+ uint32_t RefFlags = 0;
+
if (MO.getTargetFlags() & AArch64II::MO_TLS) {
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGEOFF)
- RefKind = AArch64MCExpr::VK_SECREL_LO12;
+ RefFlags |= AArch64MCExpr::VK_SECREL_LO12;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
AArch64II::MO_HI12)
- RefKind = AArch64MCExpr::VK_SECREL_HI12;
+ RefFlags |= AArch64MCExpr::VK_SECREL_HI12;
+
+ } else if (MO.getTargetFlags() & AArch64II::MO_S) {
+ RefFlags |= AArch64MCExpr::VK_SABS;
+ } else {
+ RefFlags |= AArch64MCExpr::VK_ABS;
+ }
+
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
+ RefFlags |= AArch64MCExpr::VK_G3;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2)
+ RefFlags |= AArch64MCExpr::VK_G2;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1)
+ RefFlags |= AArch64MCExpr::VK_G1;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
+ RefFlags |= AArch64MCExpr::VK_G0;
+
+ // FIXME: Currently we only set VK_NC for MO_G3/MO_G2/MO_G1/MO_G0. This is
+ // because setting VK_NC for others would mean setting their respective
+ // RefFlags correctly. We should do this in a separate patch.
+ if (MO.getTargetFlags() & AArch64II::MO_NC) {
+ auto MOFrag = (MO.getTargetFlags() & AArch64II::MO_FRAGMENT);
+ if (MOFrag == AArch64II::MO_G3 || MOFrag == AArch64II::MO_G2 ||
+ MOFrag == AArch64II::MO_G1 || MOFrag == AArch64II::MO_G0)
+ RefFlags |= AArch64MCExpr::VK_NC;
}
+
const MCExpr *Expr =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+ auto RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
+ assert(RefKind != AArch64MCExpr::VK_INVALID &&
+ "Invalid relocation requested");
Expr = AArch64MCExpr::create(Expr, RefKind, Ctx);
+
return MCOperand::createExpr(Expr);
}
@@ -253,4 +300,17 @@ void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
+
+ switch (OutMI.getOpcode()) {
+ case AArch64::CATCHRET:
+ OutMI = MCInst();
+ OutMI.setOpcode(AArch64::RET);
+ OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+ break;
+ case AArch64::CLEANUPRET:
+ OutMI = MCInst();
+ OutMI.setOpcode(AArch64::RET);
+ OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+ break;
+ }
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e42214d15699..5183e7d3c0d0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include <cassert>
@@ -97,6 +98,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// attribute, in which case it is set to false at construction.
Optional<bool> HasRedZone;
+ /// ForwardedMustTailRegParms - A list of virtual and physical registers
+ /// that must be forwarded to every musttail call.
+ SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
public:
AArch64FunctionInfo() = default;
@@ -162,6 +166,19 @@ public:
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
+ unsigned getJumpTableEntrySize(int Idx) const {
+ auto It = JumpTableEntryInfo.find(Idx);
+ if (It != JumpTableEntryInfo.end())
+ return It->second.first;
+ return 4;
+ }
+ MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
+ return JumpTableEntryInfo.find(Idx)->second.second;
+ }
+ void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+ JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
+ }
+
using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -196,10 +213,16 @@ public:
LOHRelated.insert(Args.begin(), Args.end());
}
+ SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
+ return ForwardedMustTailRegParms;
+ }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
+
+ DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index bc0168e783be..bc596dd38b6e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -20,161 +20,175 @@ using namespace llvm;
namespace {
-// Fuse CMN, CMP, TST followed by Bcc.
+/// CMN, CMP, TST followed by Bcc
static bool isArithmeticBccPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- if (SecondMI.getOpcode() == AArch64::Bcc) {
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
+ if (SecondMI.getOpcode() != AArch64::Bcc)
+ return false;
- switch (FirstMI->getOpcode()) {
- case AArch64::ADDSWri:
- case AArch64::ADDSWrr:
- case AArch64::ADDSXri:
- case AArch64::ADDSXrr:
- case AArch64::ANDSWri:
- case AArch64::ANDSWrr:
- case AArch64::ANDSXri:
- case AArch64::ANDSXrr:
- case AArch64::SUBSWri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXri:
- case AArch64::SUBSXrr:
- case AArch64::BICSWrr:
- case AArch64::BICSXrr:
- return true;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
- }
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
}
+
return false;
}
-// Fuse ALU operations followed by CBZ/CBNZ.
+/// ALU operations followed by CBZ/CBNZ.
static bool isArithmeticCbzPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
+ if (SecondMI.getOpcode() != AArch64::CBZW &&
+ SecondMI.getOpcode() != AArch64::CBZX &&
+ SecondMI.getOpcode() != AArch64::CBNZW &&
+ SecondMI.getOpcode() != AArch64::CBNZX)
+ return false;
- if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
- SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- // Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
- return true;
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
- switch (FirstMI->getOpcode()) {
- case AArch64::ADDWri:
- case AArch64::ADDWrr:
- case AArch64::ADDXri:
- case AArch64::ADDXrr:
- case AArch64::ANDWri:
- case AArch64::ANDWrr:
- case AArch64::ANDXri:
- case AArch64::ANDXrr:
- case AArch64::EORWri:
- case AArch64::EORWrr:
- case AArch64::EORXri:
- case AArch64::EORXrr:
- case AArch64::ORRWri:
- case AArch64::ORRWrr:
- case AArch64::ORRXri:
- case AArch64::ORRXrr:
- case AArch64::SUBWri:
- case AArch64::SUBWrr:
- case AArch64::SUBXri:
- case AArch64::SUBXrr:
- return true;
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
- }
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::EORWri:
+ case AArch64::EORWrr:
+ case AArch64::EORXri:
+ case AArch64::EORXrr:
+ case AArch64::ORRWri:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXri:
+ case AArch64::ORRXrr:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
}
+
return false;
}
-// Fuse AES crypto encoding or decoding.
+/// AES crypto encoding or decoding.
static bool isAESPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
+ switch (SecondMI.getOpcode()) {
// AES encode.
- if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::AESErr) &&
- (SecondOpcode == AArch64::AESMCrr ||
- SecondOpcode == AArch64::AESMCrrTied))
- return true;
+ case AArch64::AESMCrr:
+ case AArch64::AESMCrrTied:
+ return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESErr;
// AES decode.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::AESDrr) &&
- (SecondOpcode == AArch64::AESIMCrr ||
- SecondOpcode == AArch64::AESIMCrrTied))
+ case AArch64::AESIMCrr:
+ case AArch64::AESIMCrrTied:
+ return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESDrr;
+ }
+
+ return false;
+}
+
+/// AESE/AESD/PMULL + EOR.
+static bool isCryptoEORPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != AArch64::EORv16i8)
+ return false;
+
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ switch (FirstMI->getOpcode()) {
+ case AArch64::AESErr:
+ case AArch64::AESDrr:
+ case AArch64::PMULLv16i8:
+ case AArch64::PMULLv8i8:
+ case AArch64::PMULLv1i64:
+ case AArch64::PMULLv2i64:
return true;
+ }
return false;
}
-// Fuse literal generation.
+/// Literal generation.
static bool isLiteralsPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
// PC relative address.
- if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::ADRP) &&
- SecondOpcode == AArch64::ADDXri)
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
+ SecondMI.getOpcode() == AArch64::ADDXri)
return true;
+
// 32 bit immediate.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::MOVZWi) &&
- (SecondOpcode == AArch64::MOVKWi &&
- SecondMI.getOperand(3).getImm() == 16))
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
+ SecondMI.getOperand(3).getImm() == 16))
return true;
+
// Lower half of 64 bit immediate.
- else if((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- FirstOpcode == AArch64::MOVZXi) &&
- (SecondOpcode == AArch64::MOVKXi &&
- SecondMI.getOperand(3).getImm() == 16))
+ if((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZXi) &&
+ (SecondMI.getOpcode() == AArch64::MOVKXi &&
+ SecondMI.getOperand(3).getImm() == 16))
return true;
+
// Upper half of 64 bit immediate.
- else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
- (FirstOpcode == AArch64::MOVKXi &&
- FirstMI->getOperand(3).getImm() == 32)) &&
- (SecondOpcode == AArch64::MOVKXi &&
- SecondMI.getOperand(3).getImm() == 48))
+ if ((FirstMI == nullptr ||
+ (FirstMI->getOpcode() == AArch64::MOVKXi &&
+ FirstMI->getOperand(3).getImm() == 32)) &&
+ (SecondMI.getOpcode() == AArch64::MOVKXi &&
+ SecondMI.getOperand(3).getImm() == 48))
return true;
return false;
}
-// Fuse address generation and loads or stores.
+/// Fuse address generation and loads or stores.
static bool isAddressLdStPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- switch (SecondOpcode) {
+ switch (SecondMI.getOpcode()) {
case AArch64::STRBBui:
case AArch64::STRBui:
case AArch64::STRDui:
@@ -199,63 +213,164 @@ static bool isAddressLdStPair(const MachineInstr *FirstMI,
case AArch64::LDRSHXui:
case AArch64::LDRSWui:
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
- switch (FirstMI->getOpcode()) {
+ switch (FirstMI->getOpcode()) {
case AArch64::ADR:
- return (SecondMI.getOperand(2).getImm() == 0);
+ return SecondMI.getOperand(2).getImm() == 0;
case AArch64::ADRP:
return true;
}
}
+
return false;
}
-// Fuse compare and conditional select.
+/// Compare and conditional select.
static bool isCCSelectPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
- unsigned SecondOpcode = SecondMI.getOpcode();
-
// 32 bits
- if (SecondOpcode == AArch64::CSELWr) {
+ if (SecondMI.getOpcode() == AArch64::CSELWr) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
if (FirstMI->definesRegister(AArch64::WZR))
switch (FirstMI->getOpcode()) {
case AArch64::SUBSWrs:
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
case AArch64::SUBSWrx:
- return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+ return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
case AArch64::SUBSWrr:
case AArch64::SUBSWri:
return true;
}
}
+
// 64 bits
- else if (SecondOpcode == AArch64::CSELXr) {
+ if (SecondMI.getOpcode() == AArch64::CSELXr) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- if (!FirstMI)
+ if (FirstMI == nullptr)
return true;
if (FirstMI->definesRegister(AArch64::XZR))
switch (FirstMI->getOpcode()) {
case AArch64::SUBSXrs:
- return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
case AArch64::SUBSXrx:
case AArch64::SUBSXrx64:
- return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+ return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
return true;
}
}
+
+ return false;
+}
+
+// Arithmetic and logic.
+static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (AArch64InstrInfo::hasShiftedReg(SecondMI))
+ return false;
+
+ switch (SecondMI.getOpcode()) {
+ // Arithmetic
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ // Logic
+ case AArch64::ANDWrr:
+ case AArch64::ANDXrr:
+ case AArch64::BICWrr:
+ case AArch64::BICXrr:
+ case AArch64::EONWrr:
+ case AArch64::EONXrr:
+ case AArch64::EORWrr:
+ case AArch64::EORXrr:
+ case AArch64::ORNWrr:
+ case AArch64::ORNXrr:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXrr:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ // Arithmetic
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+ }
+ break;
+
+ // Arithmetic, setting flags.
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr)
+ return true;
+
+ // Arithmetic, not setting flags.
+ switch (FirstMI->getOpcode()) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+ }
+ break;
+ }
+
return false;
}
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -264,18 +379,24 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr &SecondMI) {
const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+ // All checking functions assume that the 1st instr is a wildcard if it is
+ // unspecified.
if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI))
return true;
if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
+ return true;
if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
+ return true;
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
new file mode 100644
index 000000000000..16ba3e4282a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
@@ -0,0 +1,19 @@
+//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..3da9306e6460
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -0,0 +1,108 @@
+//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+namespace {
+class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
+public:
+ AArch64PreLegalizerCombinerInfo()
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr) {}
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B);
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return Helper.tryCombineExtendingLoads(MI);
+ }
+
+ return false;
+}
+
+// Pass boilerplate
+// ================
+
+class AArch64PreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+}
+
+void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+ initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ AArch64PreLegalizerCombinerInfo PCInfo;
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 machine instrs before legalization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 machine instrs before legalization", false,
+ false)
+
+
+namespace llvm {
+FunctionPass *createAArch64PreLegalizeCombiner() {
+ return new AArch64PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index c497669f937f..68c48a5ec216 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -389,6 +389,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCEIL:
return true;
}
return false;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index a7c2c1b8125b..96ae45ae3d0d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -42,12 +43,16 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+ return CSR_Win_AArch64_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
+ return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -72,6 +77,23 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
return nullptr;
}
+void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
+ MachineFunction &MF) const {
+ const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
+ SmallVector<MCPhysReg, 32> UpdatedCSRs;
+ for (const MCPhysReg *I = CSRs; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+ UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
+ }
+ }
+ // Register lists are zero-terminated.
+ UpdatedCSRs.push_back(0);
+ MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
+}
+
const TargetRegisterClass *
AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
unsigned Idx) const {
@@ -97,6 +119,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::CXX_FAST_TLS)
return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
: CSR_AArch64_CXX_TLS_Darwin_RegMask;
+ if (CC == CallingConv::AArch64_VectorCall)
+ return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
@@ -117,6 +141,30 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
return CSR_AArch64_TLS_ELF_RegMask;
}
+void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
+ const uint32_t **Mask) const {
+ uint32_t *UpdatedMask = MF.allocateRegMask();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
+ memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize);
+
+ for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+ for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
+ this, true);
+ SubReg.isValid(); ++SubReg) {
+ // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
+ // register mask.
+ UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
+ }
+ }
+ }
+ *Mask = UpdatedMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
+ return CSR_AArch64_NoRegs_RegMask;
+}
+
const uint32_t *
AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -147,46 +195,46 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasFP(MF) || TT.isOSDarwin())
markSuperRegs(Reserved, AArch64::W29);
- if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
- markSuperRegs(Reserved, AArch64::W18); // Platform register
-
- if (MF.getSubtarget<AArch64Subtarget>().isX20Reserved())
- markSuperRegs(Reserved, AArch64::W20); // Platform register
+ for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
+ if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
+ markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
+ }
if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
+ // SLH uses register W16/X16 as the taint register.
+ if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ markSuperRegs(Reserved, AArch64::W16);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
- const AArch64FrameLowering *TFI = getFrameLowering(MF);
+ return getReservedRegs(MF)[Reg];
+}
- switch (Reg) {
- default:
- break;
- case AArch64::SP:
- case AArch64::XZR:
- case AArch64::WSP:
- case AArch64::WZR:
- return true;
- case AArch64::X18:
- case AArch64::W18:
- return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
- case AArch64::X19:
- case AArch64::W19:
- return hasBasePointer(MF);
- case AArch64::X20:
- case AArch64::W20:
- return MF.getSubtarget<AArch64Subtarget>().isX20Reserved();
- case AArch64::FP:
- case AArch64::W29:
- return TFI->hasFP(MF) || TT.isOSDarwin();
- }
+bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
+ // FIXME: Get the list of argument registers from TableGen.
+ static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7 };
+ return std::any_of(std::begin(GPRArgRegs), std::end(GPRArgRegs),
+ [this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
+}
- return false;
+void AArch64RegisterInfo::emitReservedArgRegCallError(
+ const MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ F.getContext().diagnose(DiagnosticInfoUnsupported{F, "AArch64 doesn't support"
+ " function calls if any of the argument registers is reserved."});
+}
+
+bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return !isReservedReg(MF, PhysReg);
}
bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
@@ -211,14 +259,15 @@ unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- // In the presence of variable sized objects, if the fixed stack size is
- // large enough that referencing from the FP won't result in things being
- // in range relatively often, we can use a base pointer to allow access
+ // In the presence of variable sized objects or funclets, if the fixed stack
+ // size is large enough that referencing from the FP won't result in things
+ // being in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
+ //
// Furthermore, if both variable sized objects are present, and the
// stack needs to be dynamically re-aligned, the base pointer is the only
// reliable way to reference the locals.
- if (MFI.hasVarSizedObjects()) {
+ if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
if (needsStackRealignment(MF))
return true;
// Conservatively estimate whether the negative offset from the frame
@@ -449,10 +498,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64commonRegClassID:
return 32 - 1 // XZR/SP
- (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
- - MF.getSubtarget<AArch64Subtarget>()
- .isX18Reserved() // X18 reserved as platform register
- - MF.getSubtarget<AArch64Subtarget>()
- .isX20Reserved() // X20 reserved as platform register
+ - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
- hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 57000d37090d..c4153228a7c0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -30,7 +30,18 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
public:
AArch64RegisterInfo(const Triple &TT);
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const {
+ return getEncodingValue(i);
+ }
+
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+ bool isAnyArgRegReserved(const MachineFunction &MF) const;
+ void emitReservedArgRegCallError(const MachineFunction &MF) const;
+
+ void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const;
+ void UpdateCustomCallPreservedMask(MachineFunction &MF,
+ const uint32_t **Mask) const;
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -54,6 +65,9 @@ public:
// normal calls, so they need a different mask to represent this.
const uint32_t *getTLSCallPreservedMask() const;
+ // Funclets on ARM64 Windows don't preserve any registers.
+ const uint32_t *getNoPreservedMask() const override;
+
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i64 first argument if the calling convention
/// is one that can (partially) model this attribute with a preserved mask
@@ -69,6 +83,8 @@ public:
const uint32_t *getWindowsStackProbePreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const override;
bool isConstantPhysReg(unsigned PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -76,8 +92,6 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index bbf401b474ca..d3710cea0687 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -200,6 +200,12 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
X22, X23, X24, X25, X26,
X27, X28, FP, LR)>;
+// Restricted set of tail call registers, for use when branch target
+// enforcement is enabled. These are the only registers which can be used to
+// indirectly branch (not call) to the "BTI c" instruction at the start of a
+// BTI-protected function.
+def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
// equal to the total number of bytes transferred.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
index ecc68aed1550..f757d53b6c1c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
@@ -62,13 +62,6 @@ def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
M1UnitNAL1]>; // All simple vector
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
@@ -85,14 +78,15 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU,
def M1WriteAD : SchedWriteRes<[M1UnitALU,
M1UnitC]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
- SchedVar<NoSchedPred, [M1WriteAA]>]>;
+def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M1WriteA1]>,
+ SchedVar<ExynosLogicPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
- SchedVar<NoSchedPred, [M1WriteAC]>]>;
+def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
+ SchedVar<NoSchedPred, [M1WriteAB]>]>;
def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
@@ -110,40 +104,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
let ResourceCycles = [2, 1]; }
def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
- SchedVar<NoSchedPred, [M1WriteLD]>]>;
+def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
+ SchedVar<NoSchedPred, [M1WriteL5]>]>;
def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
def M1WriteSA : SchedWriteRes<[M1UnitS,
M1UnitFST,
- M1UnitS,
- M1UnitFST]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
- M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
+def M1WriteSB : SchedWriteRes<[M1UnitS,
M1UnitFST,
M1UnitS,
M1UnitFST,
M1UnitA]> { let Latency = 3;
let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
- M1UnitFST,
- M1UnitA]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
+def M1WriteSC : SchedWriteRes<[M1UnitS,
M1UnitA]> { let Latency = 2;
let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
- SchedVar<NoSchedPred, [M1WriteSB]>]>;
+def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
+ SchedVar<NoSchedPred, [M1WriteS1]>]>;
def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
@@ -414,9 +395,9 @@ def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [1, 7, 1, 7, 1]; }
+ M1UnitFST]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 7, 1, 7, 1]; }
def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
@@ -427,9 +408,17 @@ def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
M1UnitS,
M1UnitFST,
M1UnitFST,
- M1UnitFST]> { let Latency = 17;
- let NumMicroOps = 7;
- let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+ M1UnitFST]> { let Latency = 17;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+
+// Special cases.
+def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
+ SchedVar<NoSchedPred, [M1WriteA1]>]>;
+
+// Fast forwarding.
+def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
// Branch instructions
def : InstRW<[M1WriteB1], (instrs Bcc)>;
@@ -439,8 +428,11 @@ def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rs$")>;
+
+// Move instructions.
+def : InstRW<[M1WriteCOPY], (instrs COPY)>;
// Divide and multiply instructions.
@@ -450,10 +442,20 @@ def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
def : InstRW<[M1WriteLB,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M1WriteLC,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M1WriteL5,
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M1WriteSC,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
@@ -487,8 +489,10 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
def : InstRW<[M1WriteLD,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
@@ -507,14 +511,16 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
+def : InstRW<[M1WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M1WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
+def : InstRW<[M1WriteSB,
WriteAdr], (instregex "^STPQ(post|pre)")>;
// ASIMD instructions.
@@ -608,21 +614,21 @@ def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
def : InstRW<[M1WriteVLDE,
WriteAdr], (instregex "LD1i(64)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M1WriteVLDA,
@@ -830,8 +836,6 @@ def : InstRW<[M1WriteVSTI,
WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
// Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 5e5369a5a7fe..15935088a17e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -26,9 +26,6 @@ def ExynosM3Model : SchedMachineModel {
let CompleteModel = 1; // Use the default model otherwise.
list<Predicate> UnsupportedFeatures = [HasSVE];
-
- // FIXME: Remove when all errors have been fixed.
- let FullInstRWOverlapCheck = 0;
}
//===----------------------------------------------------------------------===//
@@ -107,23 +104,12 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
M3UnitNSHF2]>;
//===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
- MI->getOperand(0).isReg() &&
- MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
- MI->getOpcode() == AArch64::EXTRXrri) &&
- MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
// Coarse scheduling model.
def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
let NumMicroOps = 1; }
+def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+ let NumMicroOps = 0; }
def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
@@ -140,15 +126,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
let NumMicroOps = 2; }
def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
- SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<ExynosArithPred, [M3WriteA1]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
+ SchedVar<ExynosLogicPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
- SchedVar<NoSchedPred, [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+ SchedVar<NoSchedPred, [M3WriteAB]>]>;
def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@@ -165,44 +159,38 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
def M3WriteLD : SchedWriteRes<[M3UnitA,
M3UnitL]> { let Latency = 4;
let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+ M3UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
- SchedVar<NoSchedPred, [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
+ SchedVar<NoSchedPred, [M3WriteL4]>]>;
def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
def M3WriteSA : SchedWriteRes<[M3UnitA,
M3UnitS,
- M3UnitFST]> { let Latency = 2;
+ M3UnitFST]> { let Latency = 3;
let NumMicroOps = 2; }
def M3WriteSB : SchedWriteRes<[M3UnitA,
- M3UnitS]> { let Latency = 1;
- let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
M3UnitS]> { let Latency = 2;
let NumMicroOps = 2; }
-def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
- SchedVar<NoSchedPred, [M3WriteSC]>]>;
-
-def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
- SchedVar<NoSchedPred, [ReadDefault]>]>;
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
// Branch instructions.
def : SchedAlias<WriteBr, M3WriteZ0>;
-def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+def : SchedAlias<WriteBrReg, M3WriteC1>;
// Arithmetic and logical integer instructions.
-def : WriteRes<WriteI, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteI, M3WriteA1>;
+def : SchedAlias<WriteISReg, M3WriteA1>;
+def : SchedAlias<WriteIEReg, M3WriteA1>;
+def : SchedAlias<WriteIS, M3WriteA1>;
// Move instructions.
-def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteImm, M3WriteA1>;
// Divide and multiply instructions.
def : WriteRes<WriteID32, [M3UnitC,
@@ -216,26 +204,23 @@ def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
let ResourceCycles = [2]; }
// Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
- M3UnitALU]> { let Latency = 1;
- let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;
// Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
- let NumMicroOps = 0; }
+def : SchedAlias<WriteAdr, M3WriteZ1>;
def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;
// Load instructions.
def : SchedAlias<WriteLD, M3WriteL4>;
def : WriteRes<WriteLDHi, []> { let Latency = 4;
let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M3WriteLX>;
+def : SchedAlias<WriteLDIdx, M3WriteLB>;
// Store instructions.
def : SchedAlias<WriteST, M3WriteS1>;
def : SchedAlias<WriteSTP, M3WriteS1>;
def : SchedAlias<WriteSTX, M3WriteS1>;
-def : SchedAlias<WriteSTIdx, M3WriteSX>;
+def : SchedAlias<WriteSTIdx, M3WriteSB>;
// FP data instructions.
def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; }
@@ -245,7 +230,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12;
def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; }
// FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
def : WriteRes<WriteFCvt, [M3UnitFCVT]> { let Latency = 3; }
def : WriteRes<WriteFImm, [M3UnitNALU]> { let Latency = 1; }
def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
@@ -481,11 +465,15 @@ def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
// Special cases.
def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
+def M3WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+ SchedVar<NoSchedPred, [M3WriteZ0]>]>;
+def M3WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
+
+// Fast forwarding.
def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
M3WriteFMAC5]>;
-def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
- SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
// Branch instructions
@@ -496,29 +484,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
// Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
// Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
// Divide and multiply instructions.
// Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
// Load instructions.
def : InstRW<[M3WriteLD,
WriteLDHi,
WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M3WriteLX,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instrs PRFMroW)>;
def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^PRFMro[WX]")>;
+ ReadAdrBase], (instrs PRFMroX)>;
// Store instructions.
+def : InstRW<[M3WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
// FP data instructions.
def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
@@ -555,9 +554,11 @@ def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
def : InstRW<[WriteVLD,
WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
- ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+ ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLE,
ReadAdrBase], (instregex "^LDRQro[WX]")>;
def : InstRW<[WriteVLD,
M3WriteLH], (instregex "^LDN?P[DS]i")>;
@@ -575,8 +576,10 @@ def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
def : InstRW<[WriteVST,
WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
- ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteSA,
+ ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BDHS]roX")>;
def : InstRW<[M3WriteSA,
ReadAdrBase], (instregex "^STRQro[WX]")>;
def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
@@ -588,7 +591,7 @@ def : InstRW<[M3WriteSA,
// ASIMD instructions.
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
@@ -597,7 +600,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
@@ -668,108 +670,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
// ASIMD load instructions.
def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
def : InstRW<[M3WriteVLDD,
- WriteAdr], (instregex "LD1i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD1i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
def : InstRW<[M3WriteVLDE,
- WriteAdr], (instregex "LD1i(64)_POST")>;
+ M3WriteA1], (instregex "LD1i(64)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteL5,
- WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDF,
- WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDF,
- WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
def : InstRW<[M3WriteVLDG,
- WriteAdr], (instregex "LD2i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD2i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
def : InstRW<[M3WriteVLDH,
- WriteAdr], (instregex "LD2i(64)_POST")>;
+ M3WriteA1], (instregex "LD2i(64)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDA,
- WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDI,
- WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDI,
- WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
def : InstRW<[M3WriteVLDJ,
- WriteAdr], (instregex "LD3i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD3i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
def : InstRW<[M3WriteVLDL,
- WriteAdr], (instregex "LD3i(64)_POST")>;
+ M3WriteA1], (instregex "LD3i(64)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDB,
- WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
def : InstRW<[M3WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+ M3WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDN,
- WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
def : InstRW<[M3WriteVLDK,
- WriteAdr], (instregex "LD4i(8|16|32)_POST")>;
+ M3WriteA1], (instregex "LD4i(8|16|32)_POST")>;
def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
def : InstRW<[M3WriteVLDM,
- WriteAdr], (instregex "LD4i(64)_POST")>;
+ M3WriteA1], (instregex "LD4i(64)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+ M3WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
def : InstRW<[M3WriteVLDC,
- WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+ M3WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
// ASIMD store instructions.
def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
new file mode 100644
index 000000000000..4d892465b3f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -0,0 +1,1004 @@
+//=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M4 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M4 is an advanced superscalar microprocessor with a 6-wide
+// in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM4Model : SchedMachineModel {
+ let IssueWidth = 6; // Up to 6 uops per cycle.
+ let MicroOpBufferSize = 228; // ROB size.
+ let LoopMicroOpBufferSize = 48; // Based on the instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 16; // Minimum branch misprediction penalty.
+ let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M4.
+
+let SchedModel = ExynosM4Model in {
+
+def M4UnitA : ProcResource<2>; // Simple integer
+def M4UnitC : ProcResource<2>; // Simple and complex integer
+let Super = M4UnitC, BufferSize = 1 in
+def M4UnitD : ProcResource<1>; // Integer division (inside C0, serialized)
+let Super = M4UnitC in
+def M4UnitE : ProcResource<1>; // CRC (inside C0)
+def M4UnitB : ProcResource<2>; // Branch
+def M4UnitL0 : ProcResource<1>; // Load
+def M4UnitS0 : ProcResource<1>; // Store
+def M4PipeLS : ProcResource<1>; // Load/Store
+let Super = M4PipeLS in {
+ def M4UnitL1 : ProcResource<1>;
+ def M4UnitS1 : ProcResource<1>;
+}
+def M4PipeF0 : ProcResource<1>; // FP #0
+let Super = M4PipeF0 in {
+ def M4UnitFMAC0 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD0 : ProcResource<1>; // Simple FP
+ def M4UnitFCVT0 : ProcResource<1>; // FP conversion
+ def M4UnitNALU0 : ProcResource<1>; // Simple vector
+ def M4UnitNHAD : ProcResource<1>; // Horizontal vector
+ def M4UnitNMSC : ProcResource<1>; // FP and vector miscellanea
+ def M4UnitNMUL0 : ProcResource<1>; // Vector multiplication
+ def M4UnitNSHT0 : ProcResource<1>; // Vector shifting
+ def M4UnitNSHF0 : ProcResource<1>; // Vector shuffling
+ def M4UnitNCRY0 : ProcResource<1>; // Cryptographic
+}
+def M4PipeF1 : ProcResource<1>; // FP #1
+let Super = M4PipeF1 in {
+ def M4UnitFMAC1 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD1 : ProcResource<1>; // Simple FP
+ def M4UnitFDIV0 : ProcResource<2>; // FP division (serialized)
+ def M4UnitFSQR0 : ProcResource<2>; // FP square root (serialized)
+ def M4UnitFST0 : ProcResource<1>; // FP store
+ def M4UnitNALU1 : ProcResource<1>; // Simple vector
+ def M4UnitNSHT1 : ProcResource<1>; // Vector shifting
+ def M4UnitNSHF1 : ProcResource<1>; // Vector shuffling
+}
+def M4PipeF2 : ProcResource<1>; // FP #2
+let Super = M4PipeF2 in {
+ def M4UnitFMAC2 : ProcResource<1>; // FP multiplication
+ def M4UnitFADD2 : ProcResource<1>; // Simple FP
+ def M4UnitFCVT1 : ProcResource<1>; // FP conversion
+ def M4UnitFDIV1 : ProcResource<2>; // FP division (serialized)
+ def M4UnitFSQR1 : ProcResource<2>; // FP square root (serialized)
+ def M4UnitFST1 : ProcResource<1>; // FP store
+ def M4UnitNALU2 : ProcResource<1>; // Simple vector
+ def M4UnitNMUL1 : ProcResource<1>; // Vector multiplication
+ def M4UnitNSHT2 : ProcResource<1>; // Vector shifting
+ def M4UnitNCRY1 : ProcResource<1>; // Cryptographic
+}
+
+def M4UnitALU : ProcResGroup<[M4UnitA,
+ M4UnitC]>;
+def M4UnitL : ProcResGroup<[M4UnitL0,
+ M4UnitL1]>;
+def M4UnitS : ProcResGroup<[M4UnitS0,
+ M4UnitS1]>;
+def M4UnitFMAC : ProcResGroup<[M4UnitFMAC0,
+ M4UnitFMAC1,
+ M4UnitFMAC2]>;
+def M4UnitFMACH : ProcResGroup<[M4UnitFMAC0,
+ M4UnitFMAC1]>;
+def M4UnitFADD : ProcResGroup<[M4UnitFADD0,
+ M4UnitFADD1,
+ M4UnitFADD2]>;
+def M4UnitFADDH : ProcResGroup<[M4UnitFADD0,
+ M4UnitFADD1]>;
+def M4UnitFCVT : ProcResGroup<[M4UnitFCVT0,
+ M4UnitFCVT1]>;
+def M4UnitFCVTH : ProcResGroup<[M4UnitFCVT0]>;
+def M4UnitFDIV : ProcResGroup<[M4UnitFDIV0,
+ M4UnitFDIV1]>;
+def M4UnitFDIVH : ProcResGroup<[M4UnitFDIV0]>;
+def M4UnitFSQR : ProcResGroup<[M4UnitFSQR0,
+ M4UnitFSQR1]>;
+def M4UnitFSQRH : ProcResGroup<[M4UnitFSQR0]>;
+def M4UnitFST : ProcResGroup<[M4UnitFST0,
+ M4UnitFST1]>;
+def M4UnitNALU : ProcResGroup<[M4UnitNALU0,
+ M4UnitNALU1,
+ M4UnitNALU2]>;
+def M4UnitNALUH : ProcResGroup<[M4UnitNALU0,
+ M4UnitNALU1]>;
+def M4UnitNMUL : ProcResGroup<[M4UnitNMUL0,
+ M4UnitNMUL1]>;
+def M4UnitNSHT : ProcResGroup<[M4UnitNSHT0,
+ M4UnitNSHT1,
+ M4UnitNSHT2]>;
+def M4UnitNSHF : ProcResGroup<[M4UnitNSHF0,
+ M4UnitNSHF1]>;
+def M4UnitNSHFH : ProcResGroup<[M4UnitNSHF0]>;
+def M4UnitNCRY : ProcResGroup<[M4UnitNCRY0,
+ M4UnitNCRY1]>;
+
+//===----------------------------------------------------------------------===//
+// Resources details.
+
+def M4WriteZ0 : SchedWriteRes<[]> { let Latency = 0; }
+def M4WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+ let NumMicroOps = 0; }
+def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
+ let NumMicroOps = 0; }
+
+def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; }
+def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; }
+def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M4WriteAB : SchedWriteRes<[M4UnitALU,
+ M4UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAC : SchedWriteRes<[M4UnitALU,
+ M4UnitALU,
+ M4UnitC]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M4WriteAD : SchedWriteRes<[M4UnitALU,
+ M4UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAF : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M4WriteZ0]>,
+ SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M4WriteA1]>,
+ SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAA]>]>;
+def M4WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAF]>]>;
+
+def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; }
+def M4WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M4WriteAC]>,
+ SchedVar<NoSchedPred, [M4WriteAB]>]>;
+
+def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; }
+def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; }
+def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4;
+ let ResourceCycles = [2]; }
+
+def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; }
+def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; }
+
+def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; }
+
+def M4WriteL4 : SchedWriteRes<[M4UnitL]> { let Latency = 4; }
+def M4WriteL5 : SchedWriteRes<[M4UnitL]> { let Latency = 5; }
+def M4WriteLA : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 1; }
+def M4WriteLB : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteLC : SchedWriteRes<[M4UnitA,
+ M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteLD : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M4WriteLE : SchedWriteRes<[M4UnitA,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M4WriteLH : SchedWriteRes<[]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def M4WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M4WriteL5]>,
+ SchedVar<NoSchedPred, [M4WriteL4]>]>;
+
+def M4WriteS1 : SchedWriteRes<[M4UnitS]> { let Latency = 1; }
+def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; }
+def M4WriteSB : SchedWriteRes<[M4UnitA,
+ M4UnitS]> { let Latency = 2;
+ let NumMicroOps = 1; }
+def M4WriteSX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteSB]>,
+ SchedVar<NoSchedPred, [M4WriteS1]>]>;
+
+def M4ReadAdrBase : SchedReadVariant<[SchedVar<
+ MCSchedPredicate<
+ CheckAny<
+ [ScaledIdxFn,
+ ExynosScaledIdxFn]>>, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+
+def M4WriteNEONA : SchedWriteRes<[M4UnitNSHF,
+ M4UnitFADD]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M4WriteNEONB : SchedWriteRes<[M4UnitNALU,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEOND : SchedWriteRes<[M4UnitNSHF,
+ M4UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M4WriteNEONH : SchedWriteRes<[M4UnitNALU,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONI : SchedWriteRes<[M4UnitNSHF,
+ M4UnitS0]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteNEONJ : SchedWriteRes<[M4UnitNMSC,
+ M4UnitS0]> { let Latency = 4; }
+def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNMSC,
+ M4UnitS0]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
+ M4UnitNMSC]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteNEONO : SchedWriteRes<[M4UnitNMSC,
+ M4UnitNMSC,
+ M4UnitNMSC]> { let Latency = 8;
+ let NumMicroOps = 3; }
+def M4WriteNEONP : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNMSC]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M4WriteNEONQ : SchedWriteRes<[M4UnitNMSC,
+ M4UnitC]> { let Latency = 3;
+ let NumMicroOps = 1; }
+def M4WriteNEONR : SchedWriteRes<[M4UnitFCVT0,
+ M4UnitS0]> { let Latency = 4;
+ let NumMicroOps = 1; }
+def M4WriteNEONV : SchedWriteRes<[M4UnitFDIV,
+ M4UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONVH : SchedWriteRes<[M4UnitFDIVH,
+ M4UnitFDIVH]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONW : SchedWriteRes<[M4UnitFDIV,
+ M4UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M4WriteNEONX : SchedWriteRes<[M4UnitFSQR,
+ M4UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [7, 7]; }
+def M4WriteNEONXH : SchedWriteRes<[M4UnitFSQRH,
+ M4UnitFSQRH]> { let Latency = 7;
+ let ResourceCycles = [6, 6]; }
+def M4WriteNEONY : SchedWriteRes<[M4UnitFSQR,
+ M4UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9, 9]; }
+def M4WriteNEONZ : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M4WriteNEONO]>,
+ SchedVar<NoSchedPred, [M4WriteNEONN]>]>;
+
+def M4WriteFADD2 : SchedWriteRes<[M4UnitFADD]> { let Latency = 2; }
+def M4WriteFADD2H : SchedWriteRes<[M4UnitFADDH]> { let Latency = 2; }
+
+def M4WriteFCVT2 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 2; }
+def M4WriteFCVT2A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 2; }
+def M4WriteFCVT2H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 2; }
+def M4WriteFCVT3 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 3; }
+def M4WriteFCVT3A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 3; }
+def M4WriteFCVT3H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 3; }
+def M4WriteFCVT4 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 4; }
+def M4WriteFCVT4A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; }
+def M4WriteFCVT6A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; }
+
+def M4WriteFDIV7 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFDIV7H : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFDIV12 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M4WriteFMAC2H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; }
+def M4WriteFMAC3H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; }
+def M4WriteFMAC3 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 3; }
+def M4WriteFMAC4 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 4; }
+def M4WriteFMAC4H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; }
+def M4WriteFMAC5 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 5; }
+
+def M4WriteFSQR7H : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7;
+ let ResourceCycles = [6]; }
+def M4WriteFSQR8 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 8;
+ let ResourceCycles = [7]; }
+def M4WriteFSQR12 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+
+def M4WriteNALU1 : SchedWriteRes<[M4UnitNALU]> { let Latency = 1; }
+def M4WriteNALU1H : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; }
+
+def M4WriteNCRY1 : SchedWriteRes<[M4UnitNCRY]> { let Latency = 1; }
+def M4WriteNCRY1A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 1; }
+def M4WriteNCRY3A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 3; }
+def M4WriteNCRY5A : SchedWriteRes<[M4UnitNCRY]> { let Latency = 5; }
+
+def M4WriteNHAD1 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 1; }
+def M4WriteNHAD3 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 3; }
+
+def M4WriteNMSC1 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 1; }
+def M4WriteNMSC2 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 2; }
+def M4WriteNMSC3 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 3; }
+
+def M4WriteNMUL3 : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
+
+def M4WriteNSHF1 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; }
+def M4WriteNSHF1H : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; }
+def M4WriteNSHF3 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; }
+def M4WriteNSHFA : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1;
+ let ResourceCycles = [2]; }
+def M4WriteNSHFB : SchedWriteRes<[M4UnitNSHF]> { let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2]; }
+def M4WriteNSHFC : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [4]; }
+def M4WriteNSHFD : SchedWriteRes<[M4UnitNSHF]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4]; }
+
+def M4WriteNSHT1 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 1; }
+def M4WriteNSHT2 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 2; }
+def M4WriteNSHT3 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 3; }
+def M4WriteNSHT4A : SchedWriteRes<[M4UnitNSHT1]> { let Latency = 4; }
+
+def M4WriteVLDA : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M4WriteVLDB : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 3; }
+def M4WriteVLDC : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 6;
+ let NumMicroOps = 4; }
+def M4WriteVLDD : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M4WriteVLDF : SchedWriteRes<[M4UnitL,
+ M4UnitL]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [3, 3]; }
+def M4WriteVLDG : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M4WriteVLDI : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3, 3, 3]; }
+def M4WriteVLDJ : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3, 1, 1, 1]; }
+def M4WriteVLDK : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3, 1, 1, 1, 1]; }
+def M4WriteVLDL : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitL,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [3, 1, 1, 6, 1]; }
+def M4WriteVLDM : SchedWriteRes<[M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitL,
+ M4UnitNSHF,
+ M4UnitNSHF]> { let Latency = 7;
+ let NumMicroOps = 6;
+ let ResourceCycles = [3, 1, 1, 3, 1, 1]; }
+def M4WriteVLDN : SchedWriteRes<[M4UnitL,
+ M4UnitL,
+ M4UnitL,
+ M4UnitL]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3, 3, 3, 3]; }
+
+def M4WriteVST1 : SchedWriteRes<[M4UnitS,
+ M4UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+def M4WriteVSTA : WriteSequence<[WriteVST], 2>;
+def M4WriteVSTB : WriteSequence<[WriteVST], 3>;
+def M4WriteVSTC : WriteSequence<[WriteVST], 4>;
+def M4WriteVSTD : SchedWriteRes<[M4UnitS,
+ M4UnitFST]> { let Latency = 2; }
+def M4WriteVSTE : SchedWriteRes<[M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M4WriteVSTF : SchedWriteRes<[M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1, 2, 1]; }
+def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 5;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTJ : SchedWriteRes<[M4UnitA,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M4WriteVSTK : SchedWriteRes<[M4UnitA,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF,
+ M4UnitNSHF,
+ M4UnitS,
+ M4UnitFST,
+ M4UnitS,
+ M4UnitFST]> { let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+
+// Special cases.
+def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
+ SchedVar<NoSchedPred, [M4WriteZ0]>]>;
+def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
+ SchedVar<NoSchedPred, [M4WriteNALU1]>]>;
+def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
+ SchedVar<NoSchedPred, [M4WriteNMUL3]>]>;
+
+// Fast forwarding.
+def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
+def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4,
+ M4WriteFMAC4H,
+ M4WriteFMAC5]>;
+def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
+def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+// Branch instructions.
+def : SchedAlias<WriteBr, M4WriteZ0>;
+def : SchedAlias<WriteBrReg, M4WriteC1>;
+
+// Arithmetic and logical integer instructions.
+def : SchedAlias<WriteI, M4WriteA1>;
+def : SchedAlias<WriteIEReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteISReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteIS, M4WriteA1>;
+
+// Move instructions.
+def : SchedAlias<WriteImm, M4WriteA1>;
+
+// Divide and multiply instructions.
+def : SchedAlias<WriteID32, M4WriteD12>;
+def : SchedAlias<WriteID64, M4WriteD21>;
+def : SchedAlias<WriteIM32, M4WriteC3>;
+def : SchedAlias<WriteIM64, M4WriteCA>;
+
+// Miscellaneous instructions.
+def : SchedAlias<WriteExtr, M4WriteAY>;
+
+// Addressing modes.
+def : SchedAlias<WriteAdr, M4WriteZ1>;
+def : SchedAlias<ReadAdrBase, M4ReadAdrBase>;
+
+// Load instructions.
+def : SchedAlias<WriteLD, M4WriteL4>;
+def : SchedAlias<WriteLDHi, M4WriteZ4>;
+def : SchedAlias<WriteLDIdx, M4WriteLX>;
+
+// Store instructions.
+def : SchedAlias<WriteST, M4WriteS1>;
+def : SchedAlias<WriteSTP, M4WriteS1>;
+def : SchedAlias<WriteSTX, M4WriteS1>;
+def : SchedAlias<WriteSTIdx, M4WriteSX>;
+
+// FP data instructions.
+def : SchedAlias<WriteF, M4WriteFADD2>;
+def : SchedAlias<WriteFCmp, M4WriteNMSC2>;
+def : SchedAlias<WriteFDiv, M4WriteFDIV12>;
+def : SchedAlias<WriteFMul, M4WriteFMAC3>;
+
+// FP miscellaneous instructions.
+def : SchedAlias<WriteFCvt, M4WriteFCVT2>;
+def : SchedAlias<WriteFImm, M4WriteNALU1>;
+def : SchedAlias<WriteFCopy, M4WriteCOPY>;
+
+// FP load instructions.
+def : SchedAlias<WriteVLD, M4WriteL5>;
+
+// FP store instructions.
+def : SchedAlias<WriteVST, M4WriteVST1>;
+
+// ASIMD FP instructions.
+def : SchedAlias<WriteV, M4WriteNALU1>;
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Generic fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// TODO: The forwarding for 32 bits actually saves 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+// Branch instructions
+def : InstRW<[M4WriteB1], (instrs Bcc)>;
+def : InstRW<[M4WriteAF], (instrs BL)>;
+def : InstRW<[M4WriteBX], (instrs BLR)>;
+def : InstRW<[M4WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M4WriteAD], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M4WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>;
+
+// Move instructions.
+def : InstRW<[M4WriteCOPY], (instrs COPY)>;
+def : InstRW<[M4WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M4WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+
+// Load instructions.
+def : InstRW<[M4WriteLD,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M4WriteL5,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M4WriteL5,
+ ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[WriteLDIdx,
+ ReadAdrBase], (instrs PRFMroX)>;
+
+// Store instructions.
+def : InstRW<[M4WriteSB,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+ ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
+
+// FP data instructions.
+def : InstRW<[M4WriteNSHF1H], (instrs FABSHr)>;
+def : InstRW<[M4WriteNSHF1], (instregex "^FABS[SD]r")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[M4WriteFADD2], (instregex "^F(ADD|SUB)[SD]rr")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.i16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.i(32|64)")>;
+def : InstRW<[M4WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVHrr)>;
+def : InstRW<[M4WriteFDIV7], (instrs FDIVSrr)>;
+def : InstRW<[M4WriteFDIV12], (instrs FDIVDrr)>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instregex "^FN?MULHrr")>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FN?MUL[SD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instrs FMULX16)>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FMULX(32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
+def : InstRW<[M4WriteNALU1H], (instrs FNEGHr)>;
+def : InstRW<[M4WriteNALU1], (instregex "^FNEG[SD]r")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT.+r")>;
+def : InstRW<[M4WriteNEONH], (instregex "^FCSEL[HSD]rrr")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTHr)>;
+def : InstRW<[M4WriteFSQR8], (instrs FSQRTSr)>;
+def : InstRW<[M4WriteFSQR12], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVTH[SD]r")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVT[SD]Hr")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^FCVT[SD][SD]r")>;
+def : InstRW<[M4WriteFCVT6A], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+def : InstRW<[M4WriteNEONR], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FMOV[HSD][ir]")>;
+def : InstRW<[M4WriteSA], (instregex "^FMOV[WX][HSD]r")>;
+def : InstRW<[M4WriteNEONJ], (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[M4WriteNEONI], (instregex "^FMOVXDHighr")>;
+def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>;
+def : InstRW<[M4WriteLE,
+ ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>;
+def : InstRW<[WriteVLD,
+ ReadAdrBase], (instregex "^LDR[BHSD]roX")>;
+def : InstRW<[M4WriteLE,
+ ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[WriteVLD,
+ M4WriteLH], (instregex "^LDN?P[SD]i")>;
+def : InstRW<[M4WriteLA,
+ M4WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M4WriteL5,
+ M4WriteLH,
+ WriteAdr], (instregex "^LDP[SD]post")>;
+def : InstRW<[M4WriteLB,
+ M4WriteLH,
+ WriteAdr], (instrs LDPQpost)>;
+def : InstRW<[M4WriteLB,
+ M4WriteLH,
+ WriteAdr], (instregex "^LDP[SD]pre")>;
+def : InstRW<[M4WriteLC,
+ M4WriteLH,
+ WriteAdr], (instrs LDPQpre)>;
+
+// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[M4WriteVSTJ,
+ ReadAdrBase], (instregex "^STR[BHSD]roW")>;
+def : InstRW<[M4WriteVSTK,
+ ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[WriteVST,
+ ReadAdrBase], (instregex "^STR[BHSD]roX")>;
+def : InstRW<[M4WriteVSTK,
+ ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>;
+def : InstRW<[M4WriteVSTA], (instregex "^STN?PQi")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[SD](post|pre)")>;
+def : InstRW<[M4WriteVSTJ,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M4WriteNHAD1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^ABSv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M4WriteNALU1], (instregex "^CMTSTv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^ML[AS]v")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteMULL,
+ M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
+def : InstRW<[M4WriteMULL,
+ M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
+def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^SHL[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^S[LR]I[dv]")>;
+def : InstRW<[M4WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT2], (instregex "^[SU]?SHLLv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M4WriteNSHF1H], (instregex "^FABSv.f16")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^FABSv.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ABD|ADD|SUB)v.f16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.f16")>;
+def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.f(32|64)")>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>;
+def : InstRW<[M4WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^[SU]CVTFv.[fi]16")>;
+def : InstRW<[M4WriteFCVT2], (instregex "^[SU]CVTFv.[fi](32|64)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVv4f16)>;
+def : InstRW<[M4WriteNEONVH], (instrs FDIVv8f16)>;
+def : InstRW<[M4WriteFDIV7], (instrs FDIVv2f32)>;
+def : InstRW<[M4WriteNEONV], (instrs FDIVv4f32)>;
+def : InstRW<[M4WriteNEONW], (instrs FDIVv2f64)>;
+def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[M4WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M4WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+def : InstRW<[M4WriteFMAC2H], (instregex "^FMULX?v.[fi]16")>;
+def : InstRW<[M4WriteFMAC3], (instregex "^FMULX?v.[fi](32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^FML[AS]v.[fi]16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^FML[AS]v.[fi](32|64)")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FNEGv.f16")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FNEGv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTv4f16)>;
+def : InstRW<[M4WriteNEONXH], (instrs FSQRTv8f16)>;
+def : InstRW<[M4WriteFSQR8], (instrs FSQRTv2f32)>;
+def : InstRW<[M4WriteNEONX], (instrs FSQRTv4f32)>;
+def : InstRW<[M4WriteNEONY], (instrs FSQRTv2f64)>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>;
+def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>;
+def : InstRW<[M4WriteNEONB], (instregex "^INSv.+gpr")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^INSv.+lane")>;
+def : InstRW<[M4WriteMOVI], (instregex "^(MOV|MVN)I")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FMOVv.f16")>;
+def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
+def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
+def : InstRW<[M4WriteFMAC4H,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4,
+ M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
+def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
+def : InstRW<[M4WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>;
+def : InstRW<[M4WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>;
+def : InstRW<[M4WriteNEONP], (instregex "^[SU]MOVv")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDD], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDD,
+ M4WriteA1], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+ M4WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDF,
+ M4WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDF,
+ M4WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDG], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDG,
+ M4WriteA1], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+ M4WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDI,
+ M4WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDI,
+ M4WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDJ,
+ M4WriteA1], (instregex "LD3i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDL], (instregex "LD3i64$")>;
+def : InstRW<[M4WriteVLDL,
+ M4WriteA1], (instregex "LD3i64_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+ M4WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDN,
+ M4WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDN,
+ M4WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDK], (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDK,
+ M4WriteA1], (instregex "LD4i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDM], (instregex "LD4i64$")>;
+def : InstRW<[M4WriteVLDM,
+ M4WriteA1], (instregex "LD4i64_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+ M4WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store instructions.
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTA,
+ M4WriteA1], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTA,
+ M4WriteA1], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTB,
+ M4WriteA1], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTB,
+ M4WriteA1], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTC,
+ M4WriteA1], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTC,
+ M4WriteA1], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteVST,
+ M4WriteA1], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTD,
+ M4WriteA1], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTD,
+ M4WriteA1], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTF,
+ M4WriteA1], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTG,
+ M4WriteA1], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTL,
+ M4WriteA1], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTI,
+ M4WriteA1], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+ M4WriteA1], (instregex "ST4i(8|16|32|64)_POST$")>;
+
+// Cryptography instructions.
+def : InstRW<[M4WriteNCRY1], (instregex "^AES[DE]")>;
+def : InstRW<[M4WriteNCRY1,
+ M4ReadAESM1], (instregex "^AESI?MC")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULv")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULLv(1|8)i")>;
+def : InstRW<[M4WriteNCRY3A], (instregex "^PMULLv(2|16)i")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>;
+def : InstRW<[M4WriteNCRY1A], (instrs SHA256SU0rr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256SU1rrr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256H2rrr)>;
+
+// CRC instructions.
+def : InstRW<[M4WriteE2], (instregex "^CRC32C?[BHWX]rr$")>;
+
+} // SchedModel = ExynosM4Model
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
new file mode 100644
index 000000000000..48c54230e9d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -0,0 +1,157 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check the shift in arithmetic and logic instructions.
+def ExynosCheckShift : CheckAny<[CheckShiftBy0,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckShiftBy1,
+ CheckShiftBy2,
+ CheckShiftBy3]>]>]>;
+
+// Exynos predicates.
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[BLR]>,
+ CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic instructions without or with limited extension or shift.
+def ExynosArithFn : TIIPredicate<
+ "isExynosArithFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithExtOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckExtBy0,
+ CheckAll<
+ [CheckAny<
+ [CheckExtUXTW,
+ CheckExtUXTX]>,
+ CheckAny<
+ [CheckExtBy1,
+ CheckExtBy2,
+ CheckExtBy3]>]>]>>>,
+ MCOpcodeSwitchCase<
+ IsArithShiftOp.ValidOpcodes,
+ MCReturnStatement<ExynosCheckShift>>,
+ MCOpcodeSwitchCase<
+ IsArithUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
+
+// Identify logic instructions with limited shift.
+def ExynosLogicFn : TIIPredicate<
+ "isExynosLogicFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<ExynosCheckShift>>,
+ MCOpcodeSwitchCase<
+ IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
+
+// Identify more logic instructions with limited shift.
+def ExynosLogicExFn : TIIPredicate<
+ "isExynosLogicExFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<
+ [ExynosCheckShift,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckShiftBy8]>]>>>,
+ MCOpcodeSwitchCase<
+ IsLogicUnshiftOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled non-extended register.
+def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLoadStoreRegOffsetOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<
+ [CheckMemExtSXTW,
+ CheckMemExtUXTW,
+ CheckMemScaled]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def ExynosLongVectorUpperFn : TIIPredicate<
+ "isExynosLongVectorUpper",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLongVectorUpperOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<FalsePred>>>;
+def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn : TIIPredicate<
+ "isExynosResetFast",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ [ADR, ADRP,
+ MOVNWi, MOVNXi,
+ MOVZWi, MOVZXi],
+ MCReturnStatement<TruePred>>,
+ MCOpcodeSwitchCase<
+ [ORRWri, ORRXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>]>>>],
+ MCReturnStatement<
+ CheckAny<
+ [IsCopyIdiomFn,
+ IsZeroFPIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+ CheckSameRegOperand<1, 2>]>>;
+
+// Identify cheap arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+ "isExynosCheapAsMove",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicImmOp.ValidOpcodes,
+ MCReturnStatement<TruePred>>],
+ MCReturnStatement<
+ CheckAny<
+ [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
new file mode 100644
index 000000000000..dbaf11fc95dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -0,0 +1,423 @@
+//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// Function mappers.
+
+// Check the extension type in arithmetic instructions.
+let FunctionMapper = "AArch64_AM::getArithExtendType" in {
+ def CheckExtUXTB : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
+ def CheckExtUXTH : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
+ def CheckExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+ def CheckExtUXTX : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+ def CheckExtSXTB : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
+ def CheckExtSXTH : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
+ def CheckExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+ def CheckExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for shifting in extended arithmetic instructions.
+foreach I = {0-3} in {
+ let FunctionMapper = "AArch64_AM::getArithShiftValue" in
+ def CheckExtBy#I : CheckImmOperand<3, I>;
+}
+
+// Check the extension type in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemExtendType" in {
+ def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+ def CheckMemExtLSL : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+ def CheckMemExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+ def CheckMemExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for scaling in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemDoShift" in
+def CheckMemScaled : CheckImmOperandSimple<3>;
+
+// Check the shifting type in arithmetic and logic instructions.
+let FunctionMapper = "AArch64_AM::getShiftType" in {
+ def CheckShiftLSL : CheckImmOperand_s<3, "AArch64_AM::LSL">;
+ def CheckShiftLSR : CheckImmOperand_s<3, "AArch64_AM::LSR">;
+ def CheckShiftASR : CheckImmOperand_s<3, "AArch64_AM::ASR">;
+ def CheckShiftROR : CheckImmOperand_s<3, "AArch64_AM::ROR">;
+ def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">;
+}
+
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-3, 8} in {
+ let FunctionMapper = "AArch64_AM::getShiftValue" in
+ def CheckShiftBy#I : CheckImmOperand<3, I>;
+}
+
+// Generic predicates.
+
+// Identify whether an instruction is the 64-bit NEON form based on its result.
+def CheckDForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, D0>,
+ CheckRegOperand<0, D1>,
+ CheckRegOperand<0, D2>,
+ CheckRegOperand<0, D3>,
+ CheckRegOperand<0, D4>,
+ CheckRegOperand<0, D5>,
+ CheckRegOperand<0, D6>,
+ CheckRegOperand<0, D7>,
+ CheckRegOperand<0, D8>,
+ CheckRegOperand<0, D9>,
+ CheckRegOperand<0, D10>,
+ CheckRegOperand<0, D11>,
+ CheckRegOperand<0, D12>,
+ CheckRegOperand<0, D13>,
+ CheckRegOperand<0, D14>,
+ CheckRegOperand<0, D15>,
+ CheckRegOperand<0, D16>,
+ CheckRegOperand<0, D17>,
+ CheckRegOperand<0, D18>,
+ CheckRegOperand<0, D19>,
+ CheckRegOperand<0, D20>,
+ CheckRegOperand<0, D21>,
+ CheckRegOperand<0, D22>,
+ CheckRegOperand<0, D23>,
+ CheckRegOperand<0, D24>,
+ CheckRegOperand<0, D25>,
+ CheckRegOperand<0, D26>,
+ CheckRegOperand<0, D27>,
+ CheckRegOperand<0, D28>,
+ CheckRegOperand<0, D29>,
+ CheckRegOperand<0, D30>,
+ CheckRegOperand<0, D31>]>]>;
+
+// Identify whether an instruction is the 128-bit NEON form based on its result.
+def CheckQForm : CheckAll<[CheckIsRegOperand<0>,
+ CheckAny<[CheckRegOperand<0, Q0>,
+ CheckRegOperand<0, Q1>,
+ CheckRegOperand<0, Q2>,
+ CheckRegOperand<0, Q3>,
+ CheckRegOperand<0, Q4>,
+ CheckRegOperand<0, Q5>,
+ CheckRegOperand<0, Q6>,
+ CheckRegOperand<0, Q7>,
+ CheckRegOperand<0, Q8>,
+ CheckRegOperand<0, Q9>,
+ CheckRegOperand<0, Q10>,
+ CheckRegOperand<0, Q11>,
+ CheckRegOperand<0, Q12>,
+ CheckRegOperand<0, Q13>,
+ CheckRegOperand<0, Q14>,
+ CheckRegOperand<0, Q15>,
+ CheckRegOperand<0, Q16>,
+ CheckRegOperand<0, Q17>,
+ CheckRegOperand<0, Q18>,
+ CheckRegOperand<0, Q19>,
+ CheckRegOperand<0, Q20>,
+ CheckRegOperand<0, Q21>,
+ CheckRegOperand<0, Q22>,
+ CheckRegOperand<0, Q23>,
+ CheckRegOperand<0, Q24>,
+ CheckRegOperand<0, Q25>,
+ CheckRegOperand<0, Q26>,
+ CheckRegOperand<0, Q27>,
+ CheckRegOperand<0, Q28>,
+ CheckRegOperand<0, Q29>,
+ CheckRegOperand<0, Q30>,
+ CheckRegOperand<0, Q31>]>]>;
+
+// Identify arithmetic instructions with extend.
+def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
+ SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+ ADDXrx64, ADDSXrx64,
+ SUBXrx64, SUBSXrx64]>;
+
+// Identify arithmetic immediate instructions.
+def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
+ SUBWri, SUBXri, SUBSWri, SUBSXri]>;
+
+// Identify arithmetic instructions with shift.
+def IsArithShiftOp : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
+ SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
+
+// Identify arithmetic instructions without shift.
+def IsArithUnshiftOp : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
+ SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
+
+// Identify logic immediate instructions.
+def IsLogicImmOp : CheckOpcode<[ANDWri, ANDXri,
+ EORWri, EORXri,
+ ORRWri, ORRXri]>;
+
+// Identify logic instructions with shift.
+def IsLogicShiftOp : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
+ BICWrs, BICXrs, BICSWrs, BICSXrs,
+ EONWrs, EONXrs,
+ EORWrs, EORXrs,
+ ORNWrs, ORNXrs,
+ ORRWrs, ORRXrs]>;
+
+// Identify logic instructions without shift.
+def IsLogicUnshiftOp : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
+ BICWrr, BICXrr, BICSWrr, BICSXrr,
+ EONWrr, EONXrr,
+ EORWrr, EORXrr,
+ ORNWrr, ORNXrr,
+ ORRWrr, ORRXrr]>;
+
+// Identify arithmetic and logic immediate instructions.
+def IsArithLogicImmOp : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
+ IsLogicImmOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions with shift.
+def IsArithLogicShiftOp : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
+ IsLogicShiftOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions without shift.
+def IsArithLogicUnshiftOp : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
+ IsLogicUnshiftOp.ValidOpcodes)>;
+
+// Identify whether an instruction is an ASIMD
+// load using the post index addressing mode.
+def IsLoadASIMDPostOp : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
+ LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
+ LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
+ LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
+ LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
+ LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
+ LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
+ LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
+ LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
+ LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
+ LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
+ LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
+ LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
+ LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
+ LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
+ LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
+ LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
+ LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
+ LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
+ LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
+ LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
+ LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
+ LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
+ LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
+ LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
+ LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
+
+// Identify whether an instruction is an ASIMD
+// store using the post index addressing mode.
+def IsStoreASIMDPostOp : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
+ ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
+ ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
+ ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
+ ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
+ ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
+ ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
+ ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
+ ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
+ ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
+ ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
+ ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
+ ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
+ ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
+ ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
+ ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
+ ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
+ ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
+
+// Identify whether an instruction is an ASIMD load
+// or store using the post index addressing mode.
+def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
+ IsStoreASIMDPostOp.ValidOpcodes)>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX,
+ LDRBBroW, LDRBBroX,
+ LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
+ LDRHHroW, LDRHHroX,
+ LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
+ LDRWroW, LDRWroX,
+ LDRSWroW, LDRSWroX,
+ LDRXroW, LDRXroX,
+ LDRBroW, LDRBroX,
+ LDRHroW, LDRHroX,
+ LDRSroW, LDRSroX,
+ LDRDroW, LDRDroX]>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
+ STRHHroW, STRHHroX,
+ STRWroW, STRWroX,
+ STRXroW, STRXroX,
+ STRBroW, STRBroX,
+ STRHroW, STRHroX,
+ STRSroW, STRSroX,
+ STRDroW, STRDroX]>;
+
+// Identify whether an instruction is a load or
+// store using the register offset addressing mode.
+def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
+ IsStoreRegOffsetOp.ValidOpcodes)>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
+ FCVTNv8i16, FCVTNv4i32,
+ FCVTXNv4f32,
+ PMULLv16i8, PMULLv2i64,
+ RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
+ RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
+ RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
+ SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
+ SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
+ SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
+ SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
+ SHLLv16i8, SHLLv8i16, SHLLv4i32,
+ SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
+ SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
+ SMLALv8i16_indexed, SMLALv4i32_indexed,
+ SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
+ SMLSLv8i16_indexed, SMLSLv4i32_indexed,
+ SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
+ SMULLv8i16_indexed, SMULLv4i32_indexed,
+ SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
+ SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
+ SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
+ SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
+ SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
+ SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
+ SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
+ SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
+ SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
+ SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
+ SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
+ SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
+ SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
+ SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
+ SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
+ UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
+ UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
+ UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
+ UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
+ UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
+ UMLALv8i16_indexed, UMLALv4i32_indexed,
+ UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
+ UMLSLv8i16_indexed, UMLSLv4i32_indexed,
+ UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
+ UMULLv8i16_indexed, UMULLv4i32_indexed,
+ UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
+ UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
+ USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
+ USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
+ USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
+ XTNv16i8, XTNv8i16, XTNv4i32]>;
+
+// Target predicates.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom",
+ MCOpcodeSwitchStatement<
+ [// MOV {Rd, SP}, {SP, Rn} =>
+ // ADD {Rd, SP}, {SP, Rn}, #0
+ MCOpcodeSwitchCase<
+ [ADDWri, ADDXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<0>,
+ CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<0, WSP>,
+ CheckRegOperand<0, SP>,
+ CheckRegOperand<1, WSP>,
+ CheckRegOperand<1, SP>]>,
+ CheckZeroOperand<2>]>>>,
+ // MOV Rd, Rm =>
+ // ORR Rd, ZR, Rm, LSL #0
+ MCOpcodeSwitchCase<
+ [ORRWrs, ORRXrs],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckIsRegOperand<2>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>,
+ CheckShiftBy0]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendedFn : TIIPredicate<"hasExtendedReg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithExtOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckNot<CheckZeroOperand<3>>>>],
+ MCReturnStatement<FalsePred>>>;
+def RegExtendedPred : MCSchedPredicate<RegExtendedFn>;
+
+// Identify arithmetic and logic instructions with a shifted register.
+def RegShiftedFn : TIIPredicate<"hasShiftedReg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsArithLogicShiftOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckNot<CheckZeroOperand<3>>>>],
+ MCReturnStatement<FalsePred>>>;
+def RegShiftedPred : MCSchedPredicate<RegShiftedFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with an extended or scaled register.
+def ScaledIdxFn : TIIPredicate<"isScaledAddr",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<
+ IsLoadStoreRegOffsetOp.ValidOpcodes,
+ MCReturnStatement<
+ CheckAny<[CheckNot<CheckMemExtLSL>,
+ CheckMemScaled]>>>],
+ MCReturnStatement<FalsePred>>>;
+def ScaledIdxPred : MCSchedPredicate<ScaledIdxFn>;
+
+// Identify an instruction that effectively resets a FP register to zero.
+def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom",
+ MCOpcodeSwitchStatement<
+ [// MOVI Vd, #0
+ MCOpcodeSwitchCase<
+ [MOVIv8b_ns, MOVIv16b_ns,
+ MOVID, MOVIv2d_ns],
+ MCReturnStatement<CheckZeroOperand<1>>>,
+ // MOVI Vd, #0, LSL #0
+ MCOpcodeSwitchCase<
+ [MOVIv4i16, MOVIv8i16,
+ MOVIv2i32, MOVIv4i32],
+ MCReturnStatement<
+ CheckAll<
+ [CheckZeroOperand<1>,
+ CheckZeroOperand<2>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom",
+ MCOpcodeSwitchStatement<
+ [// ORR Rd, ZR, #0
+ MCOpcodeSwitchCase<
+ [ORRWri, ORRXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<1, WZR>,
+ CheckRegOperand<1, XZR>]>,
+ CheckZeroOperand<2>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred : MCSchedPredicate<IsZeroIdiomFn>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
index ce81f48acf71..f55ba4d42fce 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -50,17 +50,6 @@ def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
-// Predicate for determining when a shiftable register is shifted.
-def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
-
-// Predicate for determining when a extendedable register is extended.
-def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
-
// Serialized two-level address load.
// EXAMPLE: LOADGot
def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
new file mode 100644
index 000000000000..e9699b0367d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -0,0 +1,641 @@
+//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under control flow miss-speculation.
+//
+// The pass implements tracking of control flow miss-speculation into a "taint"
+// register. That taint register can then be used to mask off registers with
+// sensitive data when executing under miss-speculation, a.k.a. "transient
+// execution".
+// This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
+//
+// It also implements speculative load hardening, i.e. using the taint register
+// to automatically mask off loaded data.
+//
+// As a possible follow-on improvement, also an intrinsics-based approach as
+// explained at https://lwn.net/Articles/759423/ could be implemented on top of
+// the current design.
+//
+// For AArch64, the following implementation choices are made to implement the
+// tracking of control flow miss-speculation into a taint register:
+// Some of these are different than the implementation choices made in
+// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
+// the instruction set characteristics result in different trade-offs.
+// - The speculation hardening is done after register allocation. With a
+// relative abundance of registers, one register is reserved (X16) to be
+// the taint register. X16 is expected to not clash with other register
+// reservation mechanisms with very high probability because:
+// . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
+// . The only way to request X16 to be used as a programmer is through
+// inline assembly. In the rare case a function explicitly demands to
+// use X16/W16, this pass falls back to hardening against speculation
+// by inserting a DSB SYS/ISB barrier pair which will prevent control
+// flow speculation.
+// - It is easy to insert mask operations at this late stage as we have
+// mask operations available that don't set flags.
+// - The taint variable contains all-ones when no miss-speculation is detected,
+// and contains all-zeros when miss-speculation is detected. Therefore, when
+// masking, an AND instruction (which only changes the register to be masked,
+// no other side effects) can easily be inserted anywhere that's needed.
+// - The tracking of miss-speculation is done by using a data-flow conditional
+// select instruction (CSEL) to evaluate the flags that were also used to
+// make conditional branch direction decisions. Speculation of the CSEL
+// instruction can be limited with a CSDB instruction - so the combination of
+// CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
+// aren't speculated. When conditional branch direction gets miss-speculated,
+// the semantics of the inserted CSEL instruction is such that the taint
+// register will contain all zero bits.
+// One key requirement for this to work is that the conditional branch is
+// followed by an execution of the CSEL instruction, where the CSEL
+// instruction needs to use the same flags status as the conditional branch.
+// This means that the conditional branches must not be implemented as one
+// of the AArch64 conditional branches that do not use the flags as input
+// (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
+// selectors to not produce these instructions when speculation hardening
+// is enabled. This pass will assert if it does encounter such an instruction.
+// - On function call boundaries, the miss-speculation state is transferred from
+// the taint register X16 to be encoded in the SP register as value 0.
+//
+// For the aspect of automatically hardening loads, using the taint register,
+// (a.k.a. speculative load hardening, see
+// https://llvm.org/docs/SpeculativeLoadHardening.html), the following
+// implementation choices are made for AArch64:
+// - Many of the optimizations described at
+// https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
+// loads haven't been implemented yet - but for some of them there are
+// FIXMEs in the code.
+// - loads that load into general purpose (X or W) registers get hardened by
+// masking the loaded data. For loads that load into other registers, the
+// address loaded from gets hardened. It is expected that hardening the
+// loaded data may be more efficient; but masking data in registers other
+// than X or W is not easy and may result in being slower than just
+// hardening the X address register loaded from.
+// - On AArch64, CSDB instructions are inserted between the masking of the
+// register and its first use, to ensure there's no non-control-flow
+// speculation that might undermine the hardening mechanism.
+//
+// Future extensions/improvements could be:
+// - Implement this functionality using full speculation barriers, akin to the
+// x86-slh-lfence option. This may be more useful for the intrinsics-based
+// approach than for the SLH approach to masking.
+// Note that this pass already inserts the full speculation barriers if the
+// function for some niche reason makes use of X16/W16.
+// - no indirect branch misprediction gets protected/instrumented; but this
+// could be done for some indirect branches, such as switch jump tables.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-speculation-hardening"
+
+#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
+
+cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
+ cl::desc("Sanitize loads from memory."),
+ cl::init(true));
+
+namespace {
+
+class AArch64SpeculationHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ static char ID;
+
+ AArch64SpeculationHardening() : MachineFunctionPass(ID) {
+ initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return AARCH64_SPECULATION_HARDENING_NAME;
+ }
+
+private:
+ unsigned MisspeculatingTaintReg;
+ unsigned MisspeculatingTaintReg32Bit;
+ bool UseControlFlowSpeculationBarrier;
+ BitVector RegsNeedingCSDBBeforeUse;
+ BitVector RegsAlreadyMasked;
+
+ bool functionUsesHardeningRegister(MachineFunction &MF) const;
+ bool instrumentControlFlow(MachineBasicBlock &MBB);
+ bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const;
+ void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
+ AArch64CC::CondCode &CondCode, DebugLoc DL) const;
+ void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const;
+
+ bool slhLoads(MachineBasicBlock &MBB);
+ bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineInstr &MI, unsigned Reg);
+ bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+ bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL);
+};
+
+} // end anonymous namespace
+
+char AArch64SpeculationHardening::ID = 0;
+
+INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
+ AARCH64_SPECULATION_HARDENING_NAME, false, false)
+
+bool AArch64SpeculationHardening::endsWithCondControlFlow(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ AArch64CC::CondCode &CondCode) const {
+ SmallVector<MachineOperand, 1> analyzeBranchCondCode;
+ if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
+ return false;
+
+ // Ignore if the BB ends in an unconditional branch/fall-through.
+ if (analyzeBranchCondCode.empty())
+ return false;
+
+ // If the BB ends with a single conditional branch, FBB will be set to
+ // nullptr (see API docs for TII->analyzeBranch). For the rest of the
+ // analysis we want the FBB block to be set always.
+ assert(TBB != nullptr);
+ if (FBB == nullptr)
+ FBB = MBB.getFallThrough();
+
+ // If both the true and the false condition jump to the same basic block,
+ // there isn't need for any protection - whether the branch is speculated
+ // correctly or not, we end up executing the architecturally correct code.
+ if (TBB == FBB)
+ return false;
+
+ assert(MBB.succ_size() == 2);
+ // translate analyzeBranchCondCode to CondCode.
+ assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
+ CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
+ return true;
+}
+
+void AArch64SpeculationHardening::insertTrackingCode(
+ MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
+ DebugLoc DL) const {
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
+ .addImm(0xf);
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
+ .addImm(0xf);
+ } else {
+ BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addImm(CondCode);
+ SplitEdgeBB.addLiveIn(AArch64::NZCV);
+ }
+}
+
+bool AArch64SpeculationHardening::instrumentControlFlow(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
+
+ bool Modified = false;
+ MachineBasicBlock *TBB = nullptr;
+ MachineBasicBlock *FBB = nullptr;
+ AArch64CC::CondCode CondCode;
+
+ if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
+ LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
+ } else {
+ // Now insert:
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
+ // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
+ // edge.
+ AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
+
+ MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
+ MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
+
+ assert(SplitEdgeTBB != nullptr);
+ assert(SplitEdgeFBB != nullptr);
+
+ DebugLoc DL;
+ if (MBB.instr_end() != MBB.instr_begin())
+ DL = (--MBB.instr_end())->getDebugLoc();
+
+ insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
+ insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
+
+ LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
+ LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
+ Modified = true;
+ }
+
+ // Perform correct code generation around function calls and before returns.
+ {
+ SmallVector<MachineInstr *, 4> ReturnInstructions;
+ SmallVector<MachineInstr *, 4> CallInstructions;
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isReturn())
+ ReturnInstructions.push_back(&MI);
+ else if (MI.isCall())
+ CallInstructions.push_back(&MI);
+ }
+
+ Modified |=
+ (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+
+ for (MachineInstr *Return : ReturnInstructions)
+ insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
+ for (MachineInstr *Call : CallInstructions) {
+ // Just after the call:
+ MachineBasicBlock::iterator i = Call;
+ i++;
+ insertSPToRegTaintPropagation(Call->getParent(), i);
+ // Just before the call:
+ insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+ }
+ }
+
+ return Modified;
+}
+
+void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+ // If full control flow speculation barriers are used, emit a control flow
+ // barrier to block potential miss-speculation in flight coming in to this
+ // function.
+ if (UseControlFlowSpeculationBarrier) {
+ // insert full control flow speculation barrier (DSB SYS + ISB)
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+ return;
+ }
+
+ // CMP SP, #0 === SUBS xzr, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+ .addDef(AArch64::XZR)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // CSETM x16, NE === CSINV x16, xzr, xzr, EQ
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+ .addDef(MisspeculatingTaintReg)
+ .addUse(AArch64::XZR)
+ .addUse(AArch64::XZR)
+ .addImm(AArch64CC::EQ);
+}
+
+void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+ unsigned TmpReg) const {
+ // If full control flow speculation barriers are used, there will not be
+ // miss-speculation when returning from this function, and therefore, also
+ // no need to encode potential miss-speculation into the stack pointer.
+ if (UseControlFlowSpeculationBarrier)
+ return;
+
+ // mov Xtmp, SP === ADD Xtmp, SP, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(TmpReg)
+ .addUse(AArch64::SP)
+ .addImm(0)
+ .addImm(0); // no shift
+ // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+ .addDef(TmpReg, RegState::Renamable)
+ .addUse(TmpReg, RegState::Kill | RegState::Renamable)
+ .addUse(MisspeculatingTaintReg, RegState::Kill)
+ .addImm(0);
+ // mov SP, Xtmp === ADD SP, Xtmp, #0
+ BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ .addDef(AArch64::SP)
+ .addUse(TmpReg, RegState::Kill)
+ .addImm(0)
+ .addImm(0); // no shift
+}
+
+bool AArch64SpeculationHardening::functionUsesHardeningRegister(
+ MachineFunction &MF) const {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // treat function calls specially, as the hardening register does not
+ // need to remain live across function calls.
+ if (MI.isCall())
+ continue;
+ if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
+ MI.modifiesRegister(MisspeculatingTaintReg, TRI))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Make GPR register Reg speculation-safe by putting it through the
+// SpeculationSafeValue pseudo instruction, if we can't prove that
+// the value in the register has already been hardened.
+bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
+ unsigned Reg) {
+ assert(AArch64::GPR32allRegClass.contains(Reg) ||
+ AArch64::GPR64allRegClass.contains(Reg));
+
+ // Loads cannot directly load a value into the SP (nor WSP).
+ // Therefore, if Reg is SP or WSP, it is because the instruction loads from
+ // the stack through the stack pointer.
+ //
+ // Since the stack pointer is never dynamically controllable, don't harden it.
+ if (Reg == AArch64::SP || Reg == AArch64::WSP)
+ return false;
+
+ // Do not harden the register again if already hardened before.
+ if (RegsAlreadyMasked[Reg])
+ return false;
+
+ const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
+ LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
+ : AArch64::SpeculationSafeValueW))
+ .addDef(Reg)
+ .addUse(Reg);
+ RegsAlreadyMasked.set(Reg);
+ return true;
+}
+
+bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
+
+ RegsAlreadyMasked.reset();
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ // Only harden loaded values or addresses used in loads.
+ if (!MI.mayLoad())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "About to harden: " << MI);
+
+ // For general purpose register loads, harden the registers loaded into.
+ // For other loads, harden the address loaded from.
+ // Masking the loaded value is expected to result in less performance
+ // overhead, as the load can still execute speculatively in comparison to
+ // when the address loaded from gets masked. However, masking is only
+ // easy to do efficiently on GPR registers, so for loads into non-GPR
+ // registers (e.g. floating point loads), mask the address loaded from.
+ bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
+ return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
+ AArch64::GPR64allRegClass.contains(Op.getReg()));
+ });
+ // FIXME: it might be a worthwhile optimization to not mask loaded
+ // values if all the registers involved in address calculation are already
+ // hardened, leading to this load not able to execute on a miss-speculated
+ // path.
+ bool HardenLoadedData = AllDefsAreGPR;
+ bool HardenAddressLoadedFrom = !HardenLoadedData;
+
+ // First remove registers from AlreadyMaskedRegisters if their value is
+ // updated by this instruction - it makes them contain a new value that is
+ // not guaranteed to already have been masked.
+ for (MachineOperand Op : MI.defs())
+ for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+ RegsAlreadyMasked.reset(*AI);
+
+ // FIXME: loads from the stack with an immediate offset from the stack
+ // pointer probably shouldn't be hardened, which could result in a
+ // significant optimization. See section "Don’t check loads from
+ // compile-time constant stack offsets", in
+ // https://llvm.org/docs/SpeculativeLoadHardening.html
+
+ if (HardenLoadedData)
+ for (auto Def : MI.defs()) {
+ if (Def.isDead())
+ // Do not mask a register that is not used further.
+ continue;
+ // FIXME: For pre/post-increment addressing modes, the base register
+ // used in address calculation is also defined by this instruction.
+ // It might be a worthwhile optimization to not harden that
+ // base register increment/decrement when the increment/decrement is
+ // an immediate.
+ Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
+ }
+
+ if (HardenAddressLoadedFrom)
+ for (auto Use : MI.uses()) {
+ if (!Use.isReg())
+ continue;
+ unsigned Reg = Use.getReg();
+ // Some loads of floating point data have implicit defs/uses on a
+ // super register of that floating point data. Some examples:
+ // $s0 = LDRSui $sp, 22, implicit-def $q0
+ // $q0 = LD1i64 $q0, 1, renamable $x0
+ // We need to filter out these uses for non-GPR register which occur
+ // because the load partially fills a non-GPR register with the loaded
+ // data. Just skipping all non-GPR registers is safe (for now) as all
+ // AArch64 load instructions only use GPR registers to perform the
+ // address calculation. FIXME: However that might change once we can
+ // produce SVE gather instructions.
+ if (!(AArch64::GPR32allRegClass.contains(Reg) ||
+ AArch64::GPR64allRegClass.contains(Reg)))
+ continue;
+ Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
+ }
+ }
+ return Modified;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded
+/// here, do the expansion and return true. Otherwise return false.
+bool AArch64SpeculationHardening::expandSpeculationSafeValue(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ bool Is64Bit = true;
+
+ switch (Opcode) {
+ default:
+ break;
+ case AArch64::SpeculationSafeValueW:
+ Is64Bit = false;
+ LLVM_FALLTHROUGH;
+ case AArch64::SpeculationSafeValueX:
+ // Just remove the SpeculationSafe pseudo's if control flow
+ // miss-speculation isn't happening because we're already inserting barriers
+ // to guarantee that.
+ if (!UseControlFlowSpeculationBarrier) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ // Mark this register and all its aliasing registers as needing to be
+ // value speculation hardened before its next use, by using a CSDB
+ // barrier instruction.
+ for (MachineOperand Op : MI.defs())
+ for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+ RegsNeedingCSDBBeforeUse.set(*AI);
+
+ // Mask off with taint state.
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
+ .addDef(DstReg)
+ .addUse(SrcReg, RegState::Kill)
+ .addUse(Is64Bit ? MisspeculatingTaintReg
+ : MisspeculatingTaintReg32Bit)
+ .addImm(0);
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) {
+ assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
+ "control flow miss-speculation "
+ "is already blocked");
+ // insert data value speculation barrier (CSDB)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
+ RegsNeedingCSDBBeforeUse.reset();
+ return true;
+}
+
+bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
+ MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ RegsNeedingCSDBBeforeUse.reset();
+
+ // The following loop iterates over all instructions in the basic block,
+ // and performs 2 operations:
+ // 1. Insert a CSDB at this location if needed.
+ // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
+ // one.
+ //
+ // The insertion of the CSDB is done as late as possible (i.e. just before
+ // the use of a masked register), in the hope that that will reduce the
+ // total number of CSDBs in a block when there are multiple masked registers
+ // in the block.
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ DebugLoc DL;
+ while (MBBI != E) {
+ MachineInstr &MI = *MBBI;
+ DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+
+ // First check if a CSDB needs to be inserted due to earlier registers
+ // that were masked and that are used by the next instruction.
+ // Also emit the barrier on any potential control flow changes.
+ bool NeedToEmitBarrier = false;
+ if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
+ NeedToEmitBarrier = true;
+ if (!NeedToEmitBarrier)
+ for (MachineOperand Op : MI.uses())
+ if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
+ NeedToEmitBarrier = true;
+ break;
+ }
+
+ if (NeedToEmitBarrier)
+ Modified |= insertCSDB(MBB, MBBI, DL);
+
+ Modified |= expandSpeculationSafeValue(MBB, MBBI);
+
+ MBBI = NMBBI;
+ }
+
+ if (RegsNeedingCSDBBeforeUse.any())
+ Modified |= insertCSDB(MBB, MBBI, DL);
+
+ return Modified;
+}
+
+bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return false;
+
+ MisspeculatingTaintReg = AArch64::X16;
+ MisspeculatingTaintReg32Bit = AArch64::W16;
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
+ RegsAlreadyMasked.resize(TRI->getNumRegs());
+ UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
+
+ bool Modified = false;
+
+ // Step 1: Enable automatic insertion of SpeculationSafeValue.
+ if (HardenLoads) {
+ LLVM_DEBUG(
+ dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
+ "SpeculationSafeValue intrinsics *****\n");
+ for (auto &MBB : MF)
+ Modified |= slhLoads(MBB);
+ }
+
+ // 2.a Add instrumentation code to function entry and exits.
+ LLVM_DEBUG(
+ dbgs()
+ << "***** AArch64SpeculationHardening - track control flow *****\n");
+
+ SmallVector<MachineBasicBlock *, 2> EntryBlocks;
+ EntryBlocks.push_back(&MF.front());
+ for (const LandingPadInfo &LPI : MF.getLandingPads())
+ EntryBlocks.push_back(LPI.LandingPadBlock);
+ for (auto Entry : EntryBlocks)
+ insertSPToRegTaintPropagation(
+ Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
+
+ // 2.b Add instrumentation code to every basic block.
+ for (auto &MBB : MF)
+ Modified |= instrumentControlFlow(MBB);
+
+ LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
+ "SpeculationSafeValue Pseudos *****\n");
+ // Step 3: Lower SpeculationSafeValue pseudo instructions.
+ for (auto &MBB : MF)
+ Modified |= lowerSpeculationSafeValuePseudos(MBB);
+
+ return Modified;
+}
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
+ return new AArch64SpeculationHardening();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index fc7b5984fe3e..d5643d384283 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -148,9 +148,11 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : MBB) {
if (!isNarrowFPStore(MI))
continue;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) {
+ if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
+ BaseOp->isReg()) {
+ unsigned BaseReg = BaseOp->getReg();
if (PrevBaseReg == BaseReg) {
// If this block can take STPs, skip ahead to the next block.
if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 04bb90d30d6d..dd30d25b2b50 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -14,13 +14,13 @@
#include "AArch64Subtarget.h"
#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64PBQPRegAlloc.h"
-#include "AArch64TargetMachine.h"
-
#include "AArch64CallLowering.h"
+#include "AArch64InstrInfo.h"
#include "AArch64LegalizerInfo.h"
+#include "AArch64PBQPRegAlloc.h"
#include "AArch64RegisterBankInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
@@ -67,16 +67,30 @@ void AArch64Subtarget::initializeProperties() {
// this in the future so we can specify it together with the subtarget
// features.
switch (ARMProcFamily) {
+ case Others:
+ break;
+ case CortexA35:
+ break;
+ case CortexA53:
+ PrefFunctionAlignment = 3;
+ break;
+ case CortexA55:
+ break;
+ case CortexA57:
+ MaxInterleaveFactor = 4;
+ PrefFunctionAlignment = 4;
+ break;
+ case CortexA72:
+ case CortexA73:
+ case CortexA75:
+ PrefFunctionAlignment = 4;
+ break;
case Cyclone:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 3;
break;
- case CortexA57:
- MaxInterleaveFactor = 4;
- PrefFunctionAlignment = 4;
- break;
case ExynosM1:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 8;
@@ -98,11 +112,6 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 2048;
MaxPrefetchIterationsAhead = 8;
break;
- case Saphira:
- MaxInterleaveFactor = 4;
- // FIXME: remove this to enable 64-bit SLP if performance looks good.
- MinVectorRegisterBitWidth = 128;
- break;
case Kryo:
MaxInterleaveFactor = 4;
VectorInsertExtractBaseCost = 2;
@@ -113,6 +122,11 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
+ case Saphira:
+ MaxInterleaveFactor = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
case ThunderX2T99:
CacheLineSize = 64;
PrefFunctionAlignment = 3;
@@ -134,17 +148,11 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
- case CortexA35: break;
- case CortexA53:
- PrefFunctionAlignment = 3;
- break;
- case CortexA55: break;
- case CortexA72:
- case CortexA73:
- case CortexA75:
+ case TSV110:
+ CacheLineSize = 64;
PrefFunctionAlignment = 4;
+ PrefLoopAlignment = 2;
break;
- case Others: break;
}
}
@@ -152,10 +160,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS),
- ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
+ ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
+ CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
+ IsLittle(LittleEndian),
TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
TLInfo(TM, *this) {
+ if (AArch64::isX18ReservedByDefault(TT))
+ ReserveXRegister.set(18);
+
CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
Legalizer.reset(new AArch64LegalizerInfo(*this));
@@ -196,18 +209,22 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
return AArch64II::MO_GOT;
- unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
- : AArch64II::MO_NO_FLAG;
-
- if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return AArch64II::MO_GOT | Flags;
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+ if (GV->hasDLLImportStorageClass())
+ return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
+ if (getTargetTriple().isOSWindows())
+ return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
+ return AArch64II::MO_GOT;
+ }
// The small code model's direct accesses use ADRP, which cannot
// necessarily produce the value 0 (if the code is above 4GB).
- if (useSmallAddressing() && GV->hasExternalWeakLinkage())
- return AArch64II::MO_GOT | Flags;
+ // Same for the tiny code model, where we have a pc relative LDR.
+ if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
+ GV->hasExternalWeakLinkage())
+ return AArch64II::MO_GOT;
- return Flags;
+ return AArch64II::MO_NO_FLAG;
}
unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
@@ -265,7 +282,7 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
// We usually compute max call frame size after ISel. Do the computation now
// if the .mir file didn't specify it. Note that this will probably give you
// bogus values after PEI has eliminated the callframe setup/destroy pseudo
- // instructions, specify explicitely if you need it to be correct.
+ // instructions, specify explicitly if you need it to be correct.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isMaxCallFrameSizeComputed())
MFI.computeMaxCallFrameSize(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5af4c0dd9c19..82f7bb755951 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,7 +56,8 @@ public:
ThunderX,
ThunderXT81,
ThunderXT83,
- ThunderXT88
+ ThunderXT88,
+ TSV110
};
protected:
@@ -67,6 +68,7 @@ protected:
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
+ bool HasV8_5aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
@@ -78,8 +80,36 @@ protected:
bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
+ bool HasFP16FML = false;
bool HasSPE = false;
+ // ARMv8.1 extensions
+ bool HasVH = false;
+ bool HasPAN = false;
+ bool HasLOR = false;
+
+ // ARMv8.2 extensions
+ bool HasPsUAO = false;
+ bool HasPAN_RWV = false;
+ bool HasCCPP = false;
+
+ // ARMv8.3 extensions
+ bool HasPA = false;
+ bool HasJS = false;
+ bool HasCCIDX = false;
+ bool HasComplxNum = false;
+
+ // ARMv8.4 extensions
+ bool HasNV = false;
+ bool HasRASv8_4 = false;
+ bool HasMPAM = false;
+ bool HasDIT = false;
+ bool HasTRACEV8_4 = false;
+ bool HasAM = false;
+ bool HasSEL2 = false;
+ bool HasTLB_RMI = false;
+ bool HasFMI = false;
+ bool HasRCPC_IMMO = false;
// ARMv8.4 Crypto extensions
bool HasSM4 = true;
bool HasSHA3 = true;
@@ -92,11 +122,25 @@ protected:
bool HasRCPC = false;
bool HasAggressiveFMA = false;
+ // Armv8.5-A Extensions
+ bool HasAlternativeNZCV = false;
+ bool HasFRInt3264 = false;
+ bool HasSpecRestrict = false;
+ bool HasSSBS = false;
+ bool HasSB = false;
+ bool HasPredRes = false;
+ bool HasCCDP = false;
+ bool HasBTI = false;
+ bool HasRandGen = false;
+ bool HasMTE = false;
+
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
+ bool HasZeroCycleZeroingGP = false;
+ bool HasZeroCycleZeroingFP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
// StrictAlign - Disallow unaligned memory accesses.
@@ -122,10 +166,13 @@ protected:
bool HasArithmeticCbzFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
+ bool HasFuseArithmeticLogic = false;
bool HasFuseCCSelect = false;
+ bool HasFuseCryptoEOR = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
+ bool Force32BitJumpTables = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -137,11 +184,11 @@ protected:
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
- // ReserveX18 - X18 is not available as a general purpose register.
- bool ReserveX18;
+ // ReserveXRegister[i] - X#i is not available as a general purpose register.
+ BitVector ReserveXRegister;
- // ReserveX20 - X20 is not available as a general purpose register.
- bool ReserveX20 = false;
+ // CustomCallUsedXRegister[i] - X#i call saved.
+ BitVector CustomCallSavedXRegs;
bool IsLittle;
@@ -211,10 +258,13 @@ public:
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
+ bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
- bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
+
+ bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
bool hasZeroCycleZeroingFPWorkaround() const {
return HasZeroCycleZeroingFPWorkaround;
@@ -228,8 +278,12 @@ public:
return MinVectorRegisterBitWidth;
}
- bool isX18Reserved() const { return ReserveX18; }
- bool isX20Reserved() const { return ReserveX20; }
+ bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
+ unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
+ bool isXRegCustomCalleeSaved(size_t i) const {
+ return CustomCallSavedXRegs[i];
+ }
+ bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
@@ -258,16 +312,20 @@ public:
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
bool hasFuseCCSelect() const { return HasFuseCCSelect; }
+ bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const {
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
- hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals();
+ hasFuseAES() || hasFuseArithmeticLogic() ||
+ hasFuseCCSelect() || hasFuseLiterals();
}
bool useRSqrt() const { return UseRSqrt; }
+ bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
@@ -291,11 +349,22 @@ public:
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFP16FML() const { return HasFP16FML; }
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
+ bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
+ bool hasFRInt3264() const { return HasFRInt3264; }
+ bool hasSpecRestrict() const { return HasSpecRestrict; }
+ bool hasSSBS() const { return HasSSBS; }
+ bool hasSB() const { return HasSB; }
+ bool hasPredRes() const { return HasPredRes; }
+ bool hasCCDP() const { return HasCCDP; }
+ bool hasBTI() const { return HasBTI; }
+ bool hasRandGen() const { return HasRandGen; }
+ bool hasMTE() const { return HasMTE; }
bool isLittleEndian() const { return IsLittle; }
@@ -312,6 +381,30 @@ public:
bool useAA() const override { return UseAA; }
+ bool hasVH() const { return HasVH; }
+ bool hasPAN() const { return HasPAN; }
+ bool hasLOR() const { return HasLOR; }
+
+ bool hasPsUAO() const { return HasPsUAO; }
+ bool hasPAN_RWV() const { return HasPAN_RWV; }
+ bool hasCCPP() const { return HasCCPP; }
+
+ bool hasPA() const { return HasPA; }
+ bool hasJS() const { return HasJS; }
+ bool hasCCIDX() const { return HasCCIDX; }
+ bool hasComplxNum() const { return HasComplxNum; }
+
+ bool hasNV() const { return HasNV; }
+ bool hasRASv8_4() const { return HasRASv8_4; }
+ bool hasMPAM() const { return HasMPAM; }
+ bool hasDIT() const { return HasDIT; }
+ bool hasTRACEV8_4() const { return HasTRACEV8_4; }
+ bool hasAM() const { return HasAM; }
+ bool hasSEL2() const { return HasSEL2; }
+ bool hasTLB_RMI() const { return HasTLB_RMI; }
+ bool hasFMI() const { return HasFMI; }
+ bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
@@ -346,6 +439,8 @@ public:
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::Swift:
return isTargetWindows();
case CallingConv::Win64:
return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index dbc4deaf3f9f..a804fb11175b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -15,6 +15,25 @@
include "llvm/TableGen/SearchableTable.td"
//===----------------------------------------------------------------------===//
+// Features that, for the compiler, only enable system operands and PStates
+//===----------------------------------------------------------------------===//
+
+def HasCCPP : Predicate<"Subtarget->hasCCPP()">,
+ AssemblerPredicate<"FeatureCCPP", "ccpp">;
+
+def HasPAN : Predicate<"Subtarget->hasPAN()">,
+ AssemblerPredicate<"FeaturePAN",
+ "ARM v8.1 Privileged Access-Never extension">;
+
+def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">,
+ AssemblerPredicate<"FeaturePsUAO",
+ "ARM v8.2 UAO PState extension (psuao)">;
+
+def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
+ AssemblerPredicate<"FeaturePAN_RWV",
+ "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+
+//===----------------------------------------------------------------------===//
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
@@ -45,7 +64,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
-let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
}
@@ -102,9 +121,33 @@ def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeatureCCPP} }] in
def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
+let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
+def : DC<"CVADP", 0b011, 0b0111, 0b1101, 0b001>;
+
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : DC<"IGVAC", 0b000, 0b0111, 0b0110, 0b011>;
+def : DC<"IGSW", 0b000, 0b0111, 0b0110, 0b100>;
+def : DC<"CGSW", 0b000, 0b0111, 0b1010, 0b100>;
+def : DC<"CIGSW", 0b000, 0b0111, 0b1110, 0b100>;
+def : DC<"CGVAC", 0b011, 0b0111, 0b1010, 0b011>;
+def : DC<"CGVAP", 0b011, 0b0111, 0b1100, 0b011>;
+def : DC<"CGVADP", 0b011, 0b0111, 0b1101, 0b011>;
+def : DC<"CIGVAC", 0b011, 0b0111, 0b1110, 0b011>;
+def : DC<"GVA", 0b011, 0b0111, 0b0100, 0b011>;
+def : DC<"IGDVAC", 0b000, 0b0111, 0b0110, 0b101>;
+def : DC<"IGDSW", 0b000, 0b0111, 0b0110, 0b110>;
+def : DC<"CGDSW", 0b000, 0b0111, 0b1010, 0b110>;
+def : DC<"CIGDSW", 0b000, 0b0111, 0b1110, 0b110>;
+def : DC<"CGDVAC", 0b011, 0b0111, 0b1010, 0b101>;
+def : DC<"CGDVAP", 0b011, 0b0111, 0b1100, 0b101>;
+def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
+def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
+def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>;
+}
+
//===----------------------------------------------------------------------===//
// IC (instruction cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
@@ -154,7 +197,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
bits<4> Encoding;
let Encoding = encoding;
- code Requires = [{ {AArch64::HasV8_4aOps} }];
+ code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
}
def : TSB<"csync", 0>;
@@ -290,14 +333,21 @@ def : PState<"SPSel", 0b00101>;
def : PState<"DAIFSet", 0b11110>;
def : PState<"DAIFClr", 0b11111>;
// v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
def : PState<"PAN", 0b00100>;
+
// v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
def : PState<"UAO", 0b00011>;
// v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::HasV8_4aOps} }] in
+let Requires = [{ {AArch64::FeatureDIT} }] in
def : PState<"DIT", 0b11010>;
+// v8.5a Spectre Mitigation
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : PState<"SSBS", 0b11001>;
+// v8.5a Memory Tagging Extension
+let Requires = [{ {AArch64::FeatureMTE} }] in
+def : PState<"TCO", 0b11100>;
//===----------------------------------------------------------------------===//
// PSB instruction options.
@@ -315,6 +365,23 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
def : PSB<"csync", 0x11>;
//===----------------------------------------------------------------------===//
+// BTI instruction options.
+//===----------------------------------------------------------------------===//
+
+class BTI<string name, bits<2> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<2> Encoding;
+ let Encoding = encoding;
+}
+
+def : BTI<"c", 0b01>;
+def : BTI<"j", 0b10>;
+def : BTI<"jc", 0b11>;
+
+//===----------------------------------------------------------------------===//
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
@@ -366,8 +433,9 @@ def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
+// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
+let Requires = [{ {AArch64::FeatureTLB_RMI} }] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
// op1 CRn CRm op2
def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
@@ -418,6 +486,23 @@ def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
+} //FeatureTLB_RMI
+
+// Armv8.5-A Prediction Restriction by Context instruction options:
+class PRCTX<string name, bits<4> crm> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<11> Encoding;
+ let Encoding{10-4} = 0b0110111;
+ let Encoding{3-0} = crm;
+ bit NeedsReg = 1;
+ code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePredRes} }] in {
+def : PRCTX<"RCTX", 0b0011>;
}
//===----------------------------------------------------------------------===//
@@ -476,8 +561,10 @@ def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
+
+//v8.3 CCIDX - extending the CCsIDr number of sets
def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> {
- let Requires = [{ {AArch64::HasV8_3aOps} }];
+ let Requires = [{ {AArch64::FeatureCCIDX} }];
}
def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>;
def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>;
@@ -487,6 +574,9 @@ def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>;
def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>;
def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>;
def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_PFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b100> {
+ let Requires = [{ {AArch64::FeatureSpecRestrict} }];
+}
def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>;
def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>;
def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>;
@@ -512,9 +602,7 @@ def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> {
- let Requires = [{ {AArch64::HasV8_2aOps} }];
-}
+def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>;
def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>;
def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>;
@@ -584,7 +672,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>;
// v8.1a "Limited Ordering Regions" extension-specific system register
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeatureLOR} }] in
def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>;
// v8.2a "RAS extension" registers
@@ -594,6 +682,22 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>;
}
+// v8.5a "random number" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRandGen} }] in {
+def : ROSysReg<"RNDR", 0b11, 0b011, 0b0010, 0b0100, 0b000>;
+def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
+}
+
+// v8.5a Software Context Number registers
+let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
+def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
+}
+
//===----------------------
// Write-only regs
//===----------------------
@@ -1102,21 +1206,21 @@ def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
// v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
// v8.1a "Limited Ordering Regions" extension-specific system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureLOR} }] in {
def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>;
def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>;
def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>;
def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
}
-// v8.1a "Virtualization hos extensions" system registers
+// v8.1a "Virtualization Host extensions" system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureVH} }] in {
def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
@@ -1147,7 +1251,7 @@ def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
}
// v8.2a registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
// v8.2a "Statistical Profiling extension" registers
@@ -1184,7 +1288,7 @@ def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
// v8.3a "Pointer authentication extension" registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+let Requires = [{ {AArch64::FeaturePA} }] in {
def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1197,8 +1301,8 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
}
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
-
+// v8.4 "Secure Exception Level 2 extension"
+let Requires = [{ {AArch64::FeatureSEL2} }] in {
// v8.4a "Virtualization secure second stage translation" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
@@ -1216,18 +1320,22 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
// v8.4a "Virtualization debug state" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+} // FeatureSEL2
// v8.4a RAS registers
-// Op0 Op1 CRn CRm Op2
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>;
def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
+} // FeatureRASv8_4
// v8.4a MPAM registers
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureMPAM} }] in {
def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
@@ -1244,9 +1352,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
+} //FeatureMPAM
-// v8.4a Activitiy monitor registers
+// v8.4a Activitiy Monitor registers
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureAM} }] in {
def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>;
def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>;
def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>;
@@ -1295,6 +1405,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+} //FeatureAM
// v8.4a Trace Extension registers
//
@@ -1303,19 +1414,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
// but they are already defined above.
//
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>;
def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>;
def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>;
+} //FeatureTRACEV8_4
// v8.4a Timining insensitivity of data processing instructions
+// DIT: Data Independent Timing instructions
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureDIT} }] in {
def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>;
+} //FeatureDIT
// v8.4a Enhanced Support for Nested Virtualization
// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureNV} }] in {
def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>;
-
-} // HasV8_4aOps
+} //FeatureNV
// SVE control registers
// Op0 Op1 CRn CRm Op2
@@ -1326,6 +1442,24 @@ def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>;
def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>;
}
+// V8.5a Spectre mitigation SSBS register
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
+
+// v8.5a Memory Tagging Extension
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>;
+def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>;
+def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>;
+def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>;
+} // HasMTE
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcCyclone} }] in
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 120d71381c67..4e016525f7e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
@@ -123,6 +124,10 @@ static cl::opt<bool>
BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
+static cl::opt<bool> EnableCompressJumpTables(
+ "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Use smallest entry possible for jump tables"));
+
// FIXME: Unify control over GlobalMerge.
static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
@@ -141,6 +146,11 @@ static cl::opt<int> EnableGlobalISelAtO(
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
+ cl::desc("Enable the AAcrh64 branch target pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -151,19 +161,23 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64A53Fix835769Pass(*PR);
initializeAArch64A57FPLoadBalancingPass(*PR);
initializeAArch64AdvSIMDScalarPass(*PR);
+ initializeAArch64BranchTargetsPass(*PR);
initializeAArch64CollectLOHPass(*PR);
+ initializeAArch64CompressJumpTablesPass(*PR);
initializeAArch64ConditionalComparesPass(*PR);
initializeAArch64ConditionOptimizerPass(*PR);
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
+ initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
+ initializeAArch64SpeculationHardeningPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -206,18 +220,20 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
- Optional<CodeModel::Model> CM,
- bool JIT) {
+static CodeModel::Model
+getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
+ bool JIT) {
if (CM) {
- if (*CM != CodeModel::Small && *CM != CodeModel::Large) {
+ if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
+ *CM != CodeModel::Large) {
if (!TT.isOSFuchsia())
report_fatal_error(
- "Only small and large code models are allowed on AArch64");
- else if (CM != CodeModel::Kernel)
- report_fatal_error(
- "Only small, kernel, and large code models are allowed on AArch64");
- }
+ "Only small, tiny and large code models are allowed on AArch64");
+ else if (*CM != CodeModel::Kernel)
+ report_fatal_error("Only small, tiny, kernel, and large code models "
+ "are allowed on AArch64");
+ } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
+ report_fatal_error("tiny code model is only supported on ELF");
return *CM;
}
// The default MCJIT memory managers make no guarantees about where they can
@@ -240,7 +256,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(TT, CM, JIT), OL),
+ getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
initAsmInfo();
@@ -249,9 +265,21 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
this->Options.NoTrapAfterNoreturn = true;
}
+ if (getMCAsmInfo()->usesWindowsCFI()) {
+ // Unwinding can get confused if the last instruction in an
+ // exception-handling region (function, funclet, try block, etc.)
+ // is a call.
+ //
+ // FIXME: We could elide the trap if the next instruction would be in
+ // the same region anyway.
+ this->Options.TrapUnreachable = true;
+ }
+
// Enable GlobalISel at or below EnableGlobalISelAt0.
- if (getOptLevel() <= EnableGlobalISelAtO)
+ if (getOptLevel() <= EnableGlobalISelAtO) {
setGlobalISel(true);
+ setGlobalISelAbort(GlobalISelAbortMode::Disable);
+ }
// AArch64 supports the MachineOutliner.
setMachineOutliner(true);
@@ -346,6 +374,7 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
@@ -393,8 +422,10 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
// Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createInterleavedLoadCombinePass());
addPass(createInterleavedAccessPass());
+ }
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
@@ -447,6 +478,10 @@ bool AArch64PassConfig::addIRTranslator() {
return false;
}
+void AArch64PassConfig::addPreLegalizeMachineIR() {
+ addPass(createAArch64PreLegalizeCombiner());
+}
+
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
@@ -516,12 +551,28 @@ void AArch64PassConfig::addPreSched2() {
if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
+ }
+
+ // The AArch64SpeculationHardeningPass destroys dominator tree and natural
+ // loop info, which is needed for the FalkorHWPFFixPass and also later on.
+ // Therefore, run the AArch64SpeculationHardeningPass before the
+ // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
+ // info.
+ addPass(createAArch64SpeculationHardeningPass());
+
+ if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
}
}
void AArch64PassConfig::addPreEmitPass() {
+ // Machine Block Placement might have created new opportunities when run
+ // at O3, where the Tail Duplication Threshold is set to 4 instructions.
+ // Run the load/store optimizer once more.
+ if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
+ addPass(createAArch64LoadStoreOptimizationPass());
+
if (EnableA53Fix835769)
addPass(createAArch64A53Fix835769());
// Relax conditional branch instructions if they're otherwise out of
@@ -529,6 +580,12 @@ void AArch64PassConfig::addPreEmitPass() {
if (BranchRelaxation)
addPass(&BranchRelaxationPassID);
+ if (EnableBranchTargets)
+ addPass(createAArch64BranchTargetsPass());
+
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ addPass(createAArch64CompressJumpTablesPass());
+
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4bc2c060a068..8ae72a7ddb57 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -22,6 +22,9 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
+ // AARCH64 ELF ABI does not define static relocation type for TLS offset
+ // within a module. Do not generate AT_location for TLS variables.
+ SupportDebugThreadLocalLocation = false;
}
AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 96e751e86971..a256cb7c9215 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -659,11 +659,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
- if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ if (!UseMaskForCond && !UseMaskForGaps &&
+ Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -676,7 +679,8 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
@@ -945,9 +949,20 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
- Kind == TTI::SK_PermuteSingleSrc) {
+ if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
+ Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
+ // Broadcast shuffle kinds can be performed with 'dup'.
+ { TTI::SK_Broadcast, MVT::v8i8, 1 },
+ { TTI::SK_Broadcast, MVT::v16i8, 1 },
+ { TTI::SK_Broadcast, MVT::v4i16, 1 },
+ { TTI::SK_Broadcast, MVT::v8i16, 1 },
+ { TTI::SK_Broadcast, MVT::v2i32, 1 },
+ { TTI::SK_Broadcast, MVT::v4i32, 1 },
+ { TTI::SK_Broadcast, MVT::v2i64, 1 },
+ { TTI::SK_Broadcast, MVT::v2f32, 1 },
+ { TTI::SK_Broadcast, MVT::v4f32, 1 },
+ { TTI::SK_Broadcast, MVT::v2f64, 1 },
// Transpose shuffle kinds can be performed with 'trn1/trn2' and
// 'zip1/zip2' instructions.
{ TTI::SK_Transpose, MVT::v8i8, 1 },
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c056a7d2428b..08c1a8924220 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -146,7 +146,9 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 30a9a08f2346..6cc9b67e4d27 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -164,6 +165,7 @@ private:
OperandVector &Operands);
bool parseDirectiveArch(SMLoc L);
+ bool parseDirectiveArchExtension(SMLoc L);
bool parseDirectiveCPU(SMLoc L);
bool parseDirectiveInst(SMLoc L);
@@ -174,6 +176,8 @@ private:
bool parseDirectiveReq(StringRef Name, SMLoc L);
bool parseDirectiveUnreq(SMLoc L);
+ bool parseDirectiveCFINegateRAState();
+ bool parseDirectiveCFIBKeyFrame();
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
@@ -200,6 +204,7 @@ private:
template <bool IsSVEPrefetch = false>
OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
+ OperandMatchResultTy tryParseBTIHint(OperandVector &Operands);
OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
template<bool AddFPZeroAsLiteral>
@@ -282,6 +287,7 @@ private:
k_FPImm,
k_Barrier,
k_PSBHint,
+ k_BTIHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -385,6 +391,12 @@ private:
unsigned Val;
};
+ struct BTIHintOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Val;
+ };
+
struct ExtendOp {
unsigned Val;
};
@@ -403,6 +415,7 @@ private:
struct SysCRImmOp SysCRImm;
struct PrefetchOp Prefetch;
struct PSBHintOp PSBHint;
+ struct BTIHintOp BTIHint;
struct ShiftExtendOp ShiftExtend;
};
@@ -457,6 +470,9 @@ public:
case k_PSBHint:
PSBHint = o.PSBHint;
break;
+ case k_BTIHint:
+ BTIHint = o.BTIHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -568,6 +584,16 @@ public:
return StringRef(PSBHint.Data, PSBHint.Length);
}
+ unsigned getBTIHint() const {
+ assert(Kind == k_BTIHint && "Invalid access!");
+ return BTIHint.Val;
+ }
+
+ StringRef getBTIHintName() const {
+ assert(Kind == k_BTIHint && "Invalid access!");
+ return StringRef(BTIHint.Data, BTIHint.Length);
+ }
+
StringRef getPrefetchName() const {
assert(Kind == k_Prefetch && "Invalid access!");
return StringRef(Prefetch.Data, Prefetch.Length);
@@ -658,7 +684,7 @@ public:
return DiagnosticPredicateTy::NearMatch;
}
- bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const {
+ bool isSymbolicUImm12Offset(const MCExpr *Expr) const {
AArch64MCExpr::VariantKind ELFRefKind;
MCSymbolRefExpr::VariantKind DarwinRefKind;
int64_t Addend;
@@ -683,7 +709,7 @@ public:
// Note that we don't range-check the addend. It's adjusted modulo page
// size when converted, so there is no "out of range" condition when using
// @pageoff.
- return Addend >= 0 && (Addend % Scale) == 0;
+ return true;
} else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
// @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
@@ -699,7 +725,7 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
- return isSymbolicUImm12Offset(getImm(), Scale);
+ return isSymbolicUImm12Offset(getImm());
int64_t Val = MCE->getValue();
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
@@ -901,7 +927,7 @@ public:
for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
if (ELFRefKind == AllowedModifiers[i])
- return Addend == 0;
+ return true;
}
return false;
@@ -996,7 +1022,8 @@ public:
if (!isSysReg()) return false;
return (SysReg.PStateField == AArch64PState::PAN ||
SysReg.PStateField == AArch64PState::DIT ||
- SysReg.PStateField == AArch64PState::UAO);
+ SysReg.PStateField == AArch64PState::UAO ||
+ SysReg.PStateField == AArch64PState::SSBS);
}
bool isSystemPStateFieldWithImm0_15() const {
@@ -1185,6 +1212,7 @@ public:
bool isSysCR() const { return Kind == k_SysCR; }
bool isPrefetch() const { return Kind == k_Prefetch; }
bool isPSBHint() const { return Kind == k_PSBHint; }
+ bool isBTIHint() const { return Kind == k_BTIHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -1702,6 +1730,11 @@ public:
Inst.addOperand(MCOperand::createImm(getPSBHint()));
}
+ void addBTIHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getBTIHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -1950,6 +1983,19 @@ public:
return Op;
}
+ static std::unique_ptr<AArch64Operand> CreateBTIHint(unsigned Val,
+ StringRef Str,
+ SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_BTIHint, Ctx);
+ Op->BTIHint.Val = Val << 1 | 32;
+ Op->BTIHint.Data = Str.data();
+ Op->BTIHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -2030,6 +2076,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
break;
LLVM_FALLTHROUGH;
+ case k_BTIHint:
+ OS << getBTIHintName();
+ break;
case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
@@ -2395,6 +2444,29 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
+OperandMatchResultTy
+AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ auto BTI = AArch64BTIHint::lookupBTIByName(Tok.getString());
+ if (!BTI) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(AArch64Operand::CreateBTIHint(
+ BTI->Encoding, Tok.getString(), S, getContext()));
+ return MatchOperand_Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
OperandMatchResultTy
@@ -2453,17 +2525,34 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Expr;
- const AsmToken &Tok = getParser().getTok();
- if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
- if (getParser().parseExpression(Expr))
- return MatchOperand_ParseFail;
+ // Leave anything with a bracket to the default for SVE
+ if (getParser().getTok().is(AsmToken::LBrac))
+ return MatchOperand_NoMatch;
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+ if (getParser().getTok().is(AsmToken::Hash))
+ getParser().Lex(); // Eat hash token.
- return MatchOperand_Success;
+ if (parseSymbolicImmVal(Expr))
+ return MatchOperand_ParseFail;
+
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+ if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
+ ELFRefKind == AArch64MCExpr::VK_INVALID) {
+ // No modifier was specified at all; this is the syntax for an ELF basic
+ // ADR relocation (unfortunately).
+ Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, getContext());
+ } else {
+ Error(S, "unexpected adr label");
+ return MatchOperand_ParseFail;
+ }
}
- return MatchOperand_NoMatch;
+
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+ return MatchOperand_Success;
}
/// tryParseFPImm - A floating point immediate expression operand.
@@ -2723,6 +2812,34 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_Success;
}
+static const struct Extension {
+ const char *Name;
+ const FeatureBitset Features;
+} ExtensionMap[] = {
+ {"crc", {AArch64::FeatureCRC}},
+ {"sm4", {AArch64::FeatureSM4}},
+ {"sha3", {AArch64::FeatureSHA3}},
+ {"sha2", {AArch64::FeatureSHA2}},
+ {"aes", {AArch64::FeatureAES}},
+ {"crypto", {AArch64::FeatureCrypto}},
+ {"fp", {AArch64::FeatureFPARMv8}},
+ {"simd", {AArch64::FeatureNEON}},
+ {"ras", {AArch64::FeatureRAS}},
+ {"lse", {AArch64::FeatureLSE}},
+ {"predres", {AArch64::FeaturePredRes}},
+ {"ccdp", {AArch64::FeatureCacheDeepPersist}},
+ {"mte", {AArch64::FeatureMTE}},
+ {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
+ {"pan-rwv", {AArch64::FeaturePAN_RWV}},
+ {"ccpp", {AArch64::FeatureCCPP}},
+ {"sve", {AArch64::FeatureSVE}},
+ // FIXME: Unsupported extensions
+ {"pan", {}},
+ {"lor", {}},
+ {"rdma", {}},
+ {"profile", {}},
+};
+
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
if (FBS[AArch64::HasV8_1aOps])
Str += "ARMv8.1a";
@@ -2732,8 +2849,18 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.3a";
else if (FBS[AArch64::HasV8_4aOps])
Str += "ARMv8.4a";
- else
- Str += "(unknown)";
+ else if (FBS[AArch64::HasV8_5aOps])
+ Str += "ARMv8.5a";
+ else {
+ auto ext = std::find_if(std::begin(ExtensionMap),
+ std::end(ExtensionMap),
+ [&](const Extension& e)
+ // Use & in case multiple features are enabled
+ { return (FBS & e.Features) != FeatureBitset(); }
+ );
+
+ Str += ext != std::end(ExtensionMap) ? ext->Name : "(unknown)";
+ }
}
void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
@@ -2812,6 +2939,23 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError(Str.c_str());
}
createSysAlias(TLBI->Encoding, Operands, S);
+ } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp") {
+ const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByName(Op);
+ if (!PRCTX)
+ return TokError("invalid operand for prediction restriction instruction");
+ else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str(
+ Mnemonic.upper() + std::string(PRCTX->Name) + " requires ");
+ setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
+ }
+ uint16_t PRCTX_Op2 =
+ Mnemonic == "cfp" ? 4 :
+ Mnemonic == "dvp" ? 5 :
+ Mnemonic == "cpp" ? 7 :
+ 0;
+ assert(PRCTX_Op2 && "Invalid mnemonic for prediction restriction instruction");
+ createSysAlias(PRCTX->Encoding << 3 | PRCTX_Op2 , Operands, S);
}
Parser.Lex(); // Eat operand.
@@ -3630,8 +3774,10 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
- if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
+ // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for
+ // the SYS instruction.
+ if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" ||
+ Head == "cfp" || Head == "dvp" || Head == "cpp")
return parseSysAlias(Head, NameLoc, Operands);
Operands.push_back(
@@ -3685,13 +3831,9 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- // Read the first operand.
- if (parseOperand(Operands, false, false)) {
- return true;
- }
- unsigned N = 2;
- while (parseOptionalToken(AsmToken::Comma)) {
+ unsigned N = 1;
+ do {
// Parse and remember the operand.
if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
(N == 3 && condCodeThirdOperand) ||
@@ -3719,7 +3861,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
AArch64Operand::CreateToken("!", false, ELoc, getContext()));
++N;
- }
+ } while (parseOptionalToken(AsmToken::Comma));
}
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
@@ -3956,6 +4098,15 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
"unpredictable STXP instruction, status is also a source");
break;
}
+ case AArch64::LDGV: {
+ unsigned Rt = Inst.getOperand(0).getReg();
+ unsigned Rn = Inst.getOperand(1).getReg();
+ if (RI->isSubRegisterEq(Rt, Rn)) {
+ return Error(Loc[0],
+ "unpredictable LDGV instruction, writeback register is also "
+ "the target register");
+ }
+ }
}
@@ -4090,6 +4241,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be an integer in range [-128, 127].");
case Match_InvalidMemoryIndexedSImm9:
return Error(Loc, "index must be an integer in range [-256, 255].");
+ case Match_InvalidMemoryIndexed16SImm9:
+ return Error(Loc, "index must be a multiple of 16 in range [-4096, 4080].");
case Match_InvalidMemoryIndexed8SImm10:
return Error(Loc, "index must be a multiple of 8 in range [-4096, 4088].");
case Match_InvalidMemoryIndexed4SImm7:
@@ -4106,6 +4259,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be a multiple of 2 in range [0, 62].");
case Match_InvalidMemoryIndexed8UImm6:
return Error(Loc, "index must be a multiple of 8 in range [0, 504].");
+ case Match_InvalidMemoryIndexed16UImm6:
+ return Error(Loc, "index must be a multiple of 16 in range [0, 1008].");
case Match_InvalidMemoryIndexed4UImm6:
return Error(Loc, "index must be a multiple of 4 in range [0, 252].");
case Match_InvalidMemoryIndexed2UImm6:
@@ -4742,10 +4897,12 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed2UImm6:
case Match_InvalidMemoryIndexed4UImm6:
case Match_InvalidMemoryIndexed8UImm6:
+ case Match_InvalidMemoryIndexed16UImm6:
case Match_InvalidMemoryIndexedSImm6:
case Match_InvalidMemoryIndexedSImm5:
case Match_InvalidMemoryIndexedSImm8:
case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidMemoryIndexed16SImm9:
case Match_InvalidMemoryIndexed8SImm10:
case Match_InvalidImm0_1:
case Match_InvalidImm0_7:
@@ -4874,6 +5031,12 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveUnreq(Loc);
else if (IDVal == ".inst")
parseDirectiveInst(Loc);
+ else if (IDVal == ".cfi_negate_ra_state")
+ parseDirectiveCFINegateRAState();
+ else if (IDVal == ".cfi_b_key_frame")
+ parseDirectiveCFIBKeyFrame();
+ else if (IDVal == ".arch_extension")
+ parseDirectiveArchExtension(Loc);
else if (IsMachO) {
if (IDVal == MCLOHDirectiveName())
parseDirectiveLOH(IDVal, Loc);
@@ -4884,28 +5047,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
}
-static const struct {
- const char *Name;
- const FeatureBitset Features;
-} ExtensionMap[] = {
- { "crc", {AArch64::FeatureCRC} },
- { "sm4", {AArch64::FeatureSM4} },
- { "sha3", {AArch64::FeatureSHA3} },
- { "sha2", {AArch64::FeatureSHA2} },
- { "aes", {AArch64::FeatureAES} },
- { "crypto", {AArch64::FeatureCrypto} },
- { "fp", {AArch64::FeatureFPARMv8} },
- { "simd", {AArch64::FeatureNEON} },
- { "ras", {AArch64::FeatureRAS} },
- { "lse", {AArch64::FeatureLSE} },
-
- // FIXME: Unsupported extensions
- { "pan", {} },
- { "lor", {} },
- { "rdma", {} },
- { "profile", {} },
-};
-
static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
SmallVector<StringRef, 4> &RequestedExtensions) {
const bool NoCrypto =
@@ -4927,6 +5068,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
RequestedExtensions.push_back("aes");
break;
case AArch64::ArchKind::ARMV8_4A:
+ case AArch64::ArchKind::ARMV8_5A:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@@ -4945,6 +5087,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
RequestedExtensions.push_back("noaes");
break;
case AArch64::ArchKind::ARMV8_4A:
+ case AArch64::ArchKind::ARMV8_5A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -5014,6 +5157,50 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
return false;
}
+/// parseDirectiveArchExtension
+/// ::= .arch_extension [no]feature
+bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLexer().getLoc(), "expected architecture extension name");
+
+ const AsmToken &Tok = Parser.getTok();
+ StringRef Name = Tok.getString();
+ SMLoc ExtLoc = Tok.getLoc();
+ Lex();
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.arch_extension' directive"))
+ return true;
+
+ bool EnableFeature = true;
+ if (Name.startswith_lower("no")) {
+ EnableFeature = false;
+ Name = Name.substr(2);
+ }
+
+ MCSubtargetInfo &STI = copySTI();
+ FeatureBitset Features = STI.getFeatureBits();
+ for (const auto &Extension : ExtensionMap) {
+ if (Extension.Name != Name)
+ continue;
+
+ if (Extension.Features.none())
+ return Error(ExtLoc, "unsupported architectural extension: " + Name);
+
+ FeatureBitset ToggleFeatures = EnableFeature
+ ? (~Features & Extension.Features)
+ : (Features & Extension.Features);
+ uint64_t Features =
+ ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ setAvailableFeatures(Features);
+ return false;
+ }
+
+ return Error(ExtLoc, "unknown architectural extension: " + Name);
+}
+
static SMLoc incrementLoc(SMLoc L, int Offset) {
return SMLoc::getFromPointer(L.getPointer() + Offset);
}
@@ -5267,6 +5454,23 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
return false;
}
+bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ return true;
+ getStreamer().EmitCFINegateRAState();
+ return false;
+}
+
+/// parseDirectiveCFIBKeyFrame
+/// ::= .cfi_b_key
+bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.cfi_b_key_frame'"))
+ return true;
+ getStreamer().EmitCFIBKeyFrame();
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -5288,28 +5492,20 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
return true;
}
- const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
- if (!BE)
- return false;
-
- SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
- if (!SE)
- return false;
- DarwinRefKind = SE->getKind();
-
- if (BE->getOpcode() != MCBinaryExpr::Add &&
- BE->getOpcode() != MCBinaryExpr::Sub)
+ // Check that it looks like a symbol + an addend
+ MCValue Res;
+ bool Relocatable = Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
+ if (!Relocatable || Res.getSymB())
return false;
- // See if the addend is a constant, otherwise there's more going
- // on here than we can deal with.
- auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
- if (!AddendExpr)
+ // Treat expressions with an ELFRefKind (like ":abs_g1:3", or
+ // ":abs_g1:x" where x is constant) as symbolic even if there is no symbol.
+ if (!Res.getSymA() && ELFRefKind == AArch64MCExpr::VK_INVALID)
return false;
- Addend = AddendExpr->getValue();
- if (BE->getOpcode() == MCBinaryExpr::Sub)
- Addend = -Addend;
+ if (Res.getSymA())
+ DarwinRefKind = Res.getSymA()->getKind();
+ Addend = Res.getConstant();
// It's some symbol reference + a constant addend, but really
// shouldn't use both Darwin and ELF syntax.
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index cef0ff346448..4102f1eb5cc1 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -12,7 +12,6 @@
#include "AArch64Disassembler.h"
#include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
@@ -20,6 +19,8 @@
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -159,8 +160,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
const void *Decoder);
static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
@@ -219,6 +220,11 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+ uint32_t insn,
+ uint64_t address,
+ const void* Decoder);
+
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
case MCDisassembler::Success:
@@ -1402,6 +1408,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::STPSpost:
case AArch64::LDPSpre:
case AArch64::STPSpre:
+ case AArch64::STGPpre:
+ case AArch64::STGPpost:
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
break;
}
@@ -1415,6 +1423,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::LDPXpre:
case AArch64::STPXpre:
case AArch64::LDPSWpre:
+ case AArch64::STGPpre:
+ case AArch64::STGPpost:
NeedsDisjointWritebackTransfer = true;
LLVM_FALLTHROUGH;
case AArch64::LDNPXi:
@@ -1422,6 +1432,7 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
case AArch64::LDPXi:
case AArch64::STPXi:
case AArch64::LDPSWi:
+ case AArch64::STGPi:
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
break;
@@ -1652,8 +1663,8 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+ uint64_t Addr, const void *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Imm = fieldFromInstruction(insn, 10, 14);
@@ -1711,11 +1722,17 @@ static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
uint64_t op1 = fieldFromInstruction(insn, 16, 3);
uint64_t op2 = fieldFromInstruction(insn, 5, 3);
uint64_t crm = fieldFromInstruction(insn, 8, 4);
-
uint64_t pstate_field = (op1 << 3) | op2;
+ switch (pstate_field) {
+ case 0x01: // XAFlag
+ case 0x02: // AXFlag
+ return Fail;
+ }
+
if ((pstate_field == AArch64PState::PAN ||
- pstate_field == AArch64PState::UAO) && crm > 1)
+ pstate_field == AArch64PState::UAO ||
+ pstate_field == AArch64PState::SSBS) && crm > 1)
return Fail;
Inst.addOperand(MCOperand::createImm(pstate_field));
@@ -1835,3 +1852,25 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
Inst.addOperand(MCOperand::createImm(Imm + 1));
return Success;
}
+
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+ uint32_t insn,
+ uint64_t address,
+ const void* Decoder) {
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+
+ // Outputs
+ DecodeGPR64spRegisterClass(Inst, Rn, address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt, address, Decoder);
+
+ // Input (Rn again)
+ Inst.addOperand(Inst.getOperand(0));
+
+ //Do this post decode since the raw number for xzr and sp is the same
+ if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+ return SoftFail;
+ } else {
+ return Success;
+ }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 6e64fc9347b9..342655a29b1d 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -60,6 +60,8 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+ if (!SymbolLookUp)
+ return false;
// FIXME: This method shares a lot of code with
// MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
// refactor the MCExternalSymbolizer interface to allow more of this
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 26e41215afc6..dcf2dd251149 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -775,8 +775,33 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
if (CnVal == 7) {
switch (CmVal) {
default: return false;
+ // Maybe IC, maybe Prediction Restriction
+ case 1:
+ switch (Op1Val) {
+ default: return false;
+ case 0: goto Search_IC;
+ case 3: goto Search_PRCTX;
+ }
+ // Prediction Restriction aliases
+ case 3: {
+ Search_PRCTX:
+ const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByEncoding(Encoding >> 3);
+ if (!PRCTX || !PRCTX->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = PRCTX->NeedsReg;
+ switch (Op2Val) {
+ default: return false;
+ case 4: Ins = "cfp\t"; break;
+ case 5: Ins = "dvp\t"; break;
+ case 7: Ins = "cpp\t"; break;
+ }
+ Name = std::string(PRCTX->Name);
+ }
+ break;
// IC aliases
- case 1: case 5: {
+ case 5: {
+ Search_IC:
const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
return false;
@@ -787,7 +812,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
}
break;
// DC aliases
- case 4: case 6: case 10: case 11: case 12: case 14:
+ case 4: case 6: case 10: case 11: case 12: case 13: case 14:
{
const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
@@ -1097,6 +1122,17 @@ void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
O << '#' << formatImm(psbhintop);
}
+void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned btihintop = (MI->getOperand(OpNum).getImm() ^ 32) >> 1;
+ auto BTI = AArch64BTIHint::lookupBTIByEncoding(btihintop);
+ if (BTI)
+ O << BTI->Name;
+ else
+ O << '#' << formatImm(btihintop);
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 8dc9264f94a1..4e9982f5b7be 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -131,6 +131,9 @@ protected:
void printPSBHintOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBTIHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 62644ab2f457..688ca755d0b5 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -342,27 +343,23 @@ static inline bool isValidDecodeLogicalImmediate(uint64_t val,
//
static inline float getFPImmFloat(unsigned Imm) {
// We expect an 8-bit binary encoding of a floating-point number here.
- union {
- uint32_t I;
- float F;
- } FPUnion;
uint8_t Sign = (Imm >> 7) & 0x1;
uint8_t Exp = (Imm >> 4) & 0x7;
uint8_t Mantissa = Imm & 0xf;
- // 8-bit FP iEEEE Float Encoding
+ // 8-bit FP IEEE Float Encoding
// abcd efgh aBbbbbbc defgh000 00000000 00000000
//
// where B = NOT(b);
- FPUnion.I = 0;
- FPUnion.I |= Sign << 31;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
- FPUnion.I |= (Exp & 0x3) << 23;
- FPUnion.I |= Mantissa << 19;
- return FPUnion.F;
+ uint32_t I = 0;
+ I |= Sign << 31;
+ I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ I |= (Exp & 0x3) << 23;
+ I |= Mantissa << 19;
+ return bit_cast<float>(I);
}
/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
@@ -757,12 +754,8 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
/// Returns true if Imm is the concatenation of a repeating pattern of type T.
template <typename T>
static inline bool isSVEMaskOfIdenticalElements(int64_t Imm) {
- union {
- int64_t Whole;
- T Parts[sizeof(int64_t)/sizeof(T)];
- } Vec { Imm };
-
- return all_of(Vec.Parts, [Vec](T Elem) { return Elem == Vec.Parts[0]; });
+ auto Parts = bit_cast<std::array<T, sizeof(int64_t) / sizeof(T)>>(Imm);
+ return all_of(Parts, [&](T Elem) { return Elem == Parts[0]; });
}
/// Returns true if Imm is valid for CPY/DUP.
@@ -790,29 +783,20 @@ static inline bool isSVEAddSubImm(int64_t Imm) {
/// Return true if Imm is valid for DUPM and has no single CPY/DUP equivalent.
static inline bool isSVEMoveMaskPreferredLogicalImmediate(int64_t Imm) {
- union {
- int64_t D;
- int32_t S[2];
- int16_t H[4];
- int8_t B[8];
- } Vec = { Imm };
-
- if (isSVECpyImm<int64_t>(Vec.D))
+ if (isSVECpyImm<int64_t>(Imm))
return false;
- if (isSVEMaskOfIdenticalElements<int32_t>(Imm) &&
- isSVECpyImm<int32_t>(Vec.S[0]))
- return false;
+ auto S = bit_cast<std::array<int32_t, 2>>(Imm);
+ auto H = bit_cast<std::array<int16_t, 4>>(Imm);
+ auto B = bit_cast<std::array<int8_t, 8>>(Imm);
- if (isSVEMaskOfIdenticalElements<int16_t>(Imm) &&
- isSVECpyImm<int16_t>(Vec.H[0]))
+ if (isSVEMaskOfIdenticalElements<int32_t>(Imm) && isSVECpyImm<int32_t>(S[0]))
return false;
-
- if (isSVEMaskOfIdenticalElements<int8_t>(Imm) &&
- isSVECpyImm<int8_t>(Vec.B[0]))
+ if (isSVEMaskOfIdenticalElements<int16_t>(Imm) && isSVECpyImm<int16_t>(H[0]))
return false;
-
- return isLogicalImmediate(Vec.D, 64);
+ if (isSVEMaskOfIdenticalElements<int8_t>(Imm) && isSVECpyImm<int8_t>(B[0]))
+ return false;
+ return isLogicalImmediate(Imm, 64);
}
inline static bool isAnyMOVZMovAlias(uint64_t Value, int RegWidth) {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 856946555198..ed89d991d9fb 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64RegisterInfo.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -19,6 +19,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
@@ -109,11 +110,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_1:
return 1;
- case AArch64::fixup_aarch64_movw:
case FK_Data_2:
case FK_SecRel_2:
return 2;
+ case AArch64::fixup_aarch64_movw:
case AArch64::fixup_aarch64_pcrel_branch14:
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
@@ -144,9 +145,9 @@ static unsigned AdrImmBits(unsigned Value) {
return (hi19 << 5) | (lo2 << 29);
}
-static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext &Ctx, const Triple &TheTriple,
- bool IsResolved) {
+static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
+ uint64_t Value, MCContext &Ctx,
+ const Triple &TheTriple, bool IsResolved) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
@@ -214,10 +215,79 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (Value & 0xf)
Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
return Value >> 4;
- case AArch64::fixup_aarch64_movw:
- Ctx.reportError(Fixup.getLoc(),
- "no resolvable MOVZ/MOVK fixups supported yet");
+ case AArch64::fixup_aarch64_movw: {
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ if (AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_ABS &&
+ AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_SABS) {
+ // VK_GOTTPREL, VK_TPREL, VK_DTPREL are movw fixups, but they can't
+ // ever be resolved in the assembler.
+ Ctx.reportError(Fixup.getLoc(),
+ "relocation for a thread-local variable points to an "
+ "absolute symbol");
+ return Value;
+ }
+
+ if (!IsResolved) {
+ // FIXME: Figure out when this can actually happen, and verify our
+ // behavior.
+ Ctx.reportError(Fixup.getLoc(), "unresolved movw fixup not yet "
+ "implemented");
+ return Value;
+ }
+
+ if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
+ switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+ case AArch64MCExpr::VK_G0:
+ break;
+ case AArch64MCExpr::VK_G1:
+ SignedValue = SignedValue >> 16;
+ break;
+ case AArch64MCExpr::VK_G2:
+ SignedValue = SignedValue >> 32;
+ break;
+ case AArch64MCExpr::VK_G3:
+ SignedValue = SignedValue >> 48;
+ break;
+ default:
+ llvm_unreachable("Variant kind doesn't correspond to fixup");
+ }
+
+ } else {
+ switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+ case AArch64MCExpr::VK_G0:
+ break;
+ case AArch64MCExpr::VK_G1:
+ Value = Value >> 16;
+ break;
+ case AArch64MCExpr::VK_G2:
+ Value = Value >> 32;
+ break;
+ case AArch64MCExpr::VK_G3:
+ Value = Value >> 48;
+ break;
+ default:
+ llvm_unreachable("Variant kind doesn't correspond to fixup");
+ }
+ }
+
+ if (RefKind & AArch64MCExpr::VK_NC) {
+ Value &= 0xFFFF;
+ }
+ else if (RefKind & AArch64MCExpr::VK_SABS) {
+ if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+ // Invert the negative immediate because it will feed into a MOVN.
+ if (SignedValue < 0)
+ SignedValue = ~SignedValue;
+ Value = static_cast<uint64_t>(SignedValue);
+ }
+ else if (Value > 0xFFFF) {
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ }
return Value;
+ }
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
if (SignedValue > 32767 || SignedValue < -32768)
@@ -294,8 +364,9 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
+ int64_t SignedValue = static_cast<int64_t>(Value);
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, Ctx, TheTriple, IsResolved);
+ Value = adjustFixupValue(Fixup, Target, Value, Ctx, TheTriple, IsResolved);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -322,6 +393,19 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
+
+ // FIXME: getFixupKindInfo() and getFixupKindNumBytes() could be fixed to
+ // handle this more cleanly. This may affect the output of -show-mc-encoding.
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ if (RefKind & AArch64MCExpr::VK_SABS) {
+ // If the immediate is negative, generate MOVN else MOVZ.
+ // (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
+ if (SignedValue < 0)
+ Data[Offset + 3] &= ~(1 << 6);
+ else
+ Data[Offset + 3] |= (1 << 6);
+ }
}
bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst,
@@ -376,6 +460,14 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
return true;
+
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+ // LDR GOT relocations need a relocation
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
+ SymLoc == AArch64MCExpr::VK_GOT)
+ return true;
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a11e396217af..2ccd7cef8bef 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -138,7 +138,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else
return ELF::R_AARCH64_PREL64;
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
+ if (SymLoc != AArch64MCExpr::VK_ABS)
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid symbol kind for ADR relocation");
return R_CLS(ADR_PREL_LO21);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
@@ -169,6 +171,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
return R_CLS(TLSIE_LD_GOTTPREL_PREL19);
+ if (SymLoc == AArch64MCExpr::VK_GOT)
+ return R_CLS(GOT_LD_PREL19);
return R_CLS(LD_PREL_LO19);
case AArch64::fixup_aarch64_pcrel_branch14:
return R_CLS(TSTBR14);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index c0ef8b670286..9a7e34b0aeb1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -60,16 +60,6 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n";
}
-class AArch64TargetELFStreamer : public AArch64TargetStreamer {
-private:
- AArch64ELFStreamer &getStreamer();
-
- void emitInst(uint32_t Inst) override;
-
-public:
- AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
-};
-
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// AArch64 ELF ABI:
@@ -85,8 +75,6 @@ public:
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
- friend class AArch64TargetELFStreamer;
-
AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter)
@@ -154,6 +142,11 @@ public:
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) override {
+ EmitDataMappingSymbol();
+ MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+ }
private:
enum ElfMappingSymbol {
EMS_None,
@@ -192,6 +185,8 @@ private:
} // end anonymous namespace
+namespace llvm {
+
AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
return static_cast<AArch64ELFStreamer &>(Streamer);
}
@@ -200,8 +195,6 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
getStreamer().emitInst(Inst);
}
-namespace llvm {
-
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
@@ -221,14 +214,4 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
return S;
}
-MCTargetStreamer *
-createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
- const Triple &TT = STI.getTargetTriple();
- if (TT.isOSBinFormatELF())
- return new AArch64TargetELFStreamer(S);
- if (TT.isOSBinFormatCOFF())
- return new AArch64TargetWinCOFFStreamer(S);
- return nullptr;
-}
-
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ebb49121c1bf..58e4a9c9a9e9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -115,6 +115,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
CommentString = ";";
ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
}
AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
@@ -131,4 +132,7 @@ AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
CommentString = "//";
ExceptionsType = ExceptionHandling::DwarfCFI;
+ // The default is dwarf, but WinEH can be enabled optionally, which requires
+ // WinEHEncodingType to be set.
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index cd937935ddbf..729486b1020c 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "AArch64MCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -62,8 +62,10 @@ StringRef AArch64MCExpr::getVariantKindName() const {
case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
case VK_ABS_PAGE: return "";
case VK_ABS_PAGE_NC: return ":pg_hi21_nc:";
+ case VK_GOT: return ":got:";
case VK_GOT_PAGE: return ":got:";
case VK_GOT_LO12: return ":got_lo12:";
+ case VK_GOTTPREL: return ":gottprel:";
case VK_GOTTPREL_PAGE: return ":gottprel:";
case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
case VK_GOTTPREL_G1: return ":gottprel_g1:";
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 4ceda7e122f4..0f8198ba4e9b 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -16,6 +16,7 @@
#include "AArch64MCAsmInfo.h"
#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -24,12 +25,14 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
+#define GET_INSTRINFO_MC_HELPERS
#include "AArch64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
@@ -153,6 +156,31 @@ public:
}
return false;
}
+
+ std::vector<std::pair<uint64_t, uint64_t>>
+ findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA,
+ const Triple &TargetTriple) const override {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
+ Byte += 4) {
+ uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+ // Check for adrp.
+ if ((Insn & 0x9f000000) != 0x90000000)
+ continue;
+ uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
+ (((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
+ uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+ // Check for: ldr Xt, [Xn, #pimm].
+ if (Insn2 >> 22 == 0x3e5) {
+ Imm += ((Insn2 >> 10) & 0xfff) << 3;
+ Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+ Byte += 4;
+ }
+ }
+ return Result;
+ }
};
} // end anonymous namespace
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 63f50778ccdb..0f22f69bd5b0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -84,6 +84,7 @@ void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
// Defines symbolic names for the AArch64 instructions.
//
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index dee964df2635..a6b8d963bef9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
@@ -52,3 +53,17 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
getStreamer().EmitBytes(StringRef(Buffer, 4));
}
+
+namespace llvm {
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new AArch64TargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return new AArch64TargetWinCOFFStreamer(S);
+ return nullptr;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 51432830f795..73fb9baea3e3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -12,6 +12,10 @@
#include "llvm/MC/MCStreamer.h"
+namespace {
+class AArch64ELFStreamer;
+}
+
namespace llvm {
class AArch64TargetStreamer : public MCTargetStreamer {
@@ -33,10 +37,75 @@ public:
/// Callback used to implement the .inst directive.
virtual void emitInst(uint32_t Inst);
+ virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
+ virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
+ virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
+ virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISetFP() {}
+ virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
+ virtual void EmitARM64WinCFINop() {}
+ virtual void EmitARM64WinCFIPrologEnd() {}
+ virtual void EmitARM64WinCFIEpilogStart() {}
+ virtual void EmitARM64WinCFIEpilogEnd() {}
+
private:
std::unique_ptr<AssemblerConstantPools> ConstantPools;
};
+class AArch64TargetELFStreamer : public AArch64TargetStreamer {
+private:
+ AArch64ELFStreamer &getStreamer();
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
+};
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+ // True if we are processing SEH directives in an epilogue.
+ bool InEpilogCFI = false;
+
+ // Symbol of the current epilog for which we are processing SEH directives.
+ MCSymbol *CurrentEpilog = nullptr;
+public:
+ AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+ : AArch64TargetStreamer(S) {}
+
+ // The unwind codes on ARM64 Windows are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+ void EmitARM64WinCFIAllocStack(unsigned Size) override;
+ void EmitARM64WinCFISaveFPLR(int Offset) override;
+ void EmitARM64WinCFISaveFPLRX(int Offset) override;
+ void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISetFP() override;
+ void EmitARM64WinCFIAddFP(unsigned Size) override;
+ void EmitARM64WinCFINop() override;
+ void EmitARM64WinCFIPrologEnd() override;
+ void EmitARM64WinCFIEpilogStart() override;
+ void EmitARM64WinCFIEpilogEnd() override;
+private:
+ void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
+};
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 9871dc553bed..b828ab832e9d 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -11,31 +11,184 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
-public:
- friend class AArch64TargetWinCOFFStreamer;
+ Win64EH::ARM64UnwindEmitter EHStreamer;
+public:
AArch64WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
std::unique_ptr<MCCodeEmitter> CE,
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
+ void EmitWinEHHandlerData(SMLoc Loc) override;
+ void EmitWindowsUnwindTables() override;
void FinishImpl() override;
};
+void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::EmitWinEHHandlerData(Loc);
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+}
+
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
+}
+
void AArch64WinCOFFStreamer::FinishImpl() {
EmitFrames(nullptr);
+ EmitWindowsUnwindTables();
MCWinCOFFStreamer::FinishImpl();
}
} // end anonymous namespace
namespace llvm {
+
+// Helper function to common out unwind code setup for those codes that can
+// belong to both prolog and epilog.
+// There are three types of Windows ARM64 SEH codes. They can
+// 1) take no operands: SEH_Nop, SEH_PrologEnd, SEH_EpilogStart, SEH_EpilogEnd
+// 2) take an offset: SEH_StackAlloc, SEH_SaveFPLR, SEH_SaveFPLR_X
+// 3) take a register and an offset/size: all others
+void AArch64TargetWinCOFFStreamer::EmitARM64WinUnwindCode(unsigned UnwindCode,
+ int Reg,
+ int Offset) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+ MCSymbol *Label = S.EmitCFILabel();
+ auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+ if (InEpilogCFI)
+ CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ else
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
+ unsigned Op = Win64EH::UOP_AllocSmall;
+ if (Size >= 16384)
+ Op = Win64EH::UOP_AllocLarge;
+ else if (Size >= 512)
+ Op = Win64EH::UOP_AllocMedium;
+ EmitARM64WinUnwindCode(Op, -1, Size);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLRX(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLRX, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveReg(unsigned Reg,
+ int Offset) {
+ assert(Offset >= 0 && Offset <= 504 &&
+ "Offset for save reg should be >= 0 && <= 504");
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegP(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
+ int Offset) {
+ assert(Offset >= 0 && Offset <= 504 &&
+ "Offset for save reg should be >= 0 && <= 504");
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegP(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegPX(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISetFP() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SetFP, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAddFP(unsigned Offset) {
+ assert(Offset <= 2040 && "UOP_AddFP must have offset <= 2040");
+ EmitARM64WinUnwindCode(Win64EH::UOP_AddFP, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
+}
+
+// The functions below handle opcodes that can end up in either a prolog or
+// an epilog, but not both.
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ MCSymbol *Label = S.EmitCFILabel();
+ CurFrame->PrologEnd = Label;
+ WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+ auto it = CurFrame->Instructions.begin();
+ CurFrame->Instructions.insert(it, Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogStart() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = true;
+ CurrentEpilog = S.EmitCFILabel();
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = false;
+ MCSymbol *Label = S.EmitCFILabel();
+ WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+ CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ CurrentEpilog = nullptr;
+}
+
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index c05422163584..ed265a876ab3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -17,20 +17,6 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
-namespace {
-class AArch64WinCOFFStreamer;
-
-class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
-private:
- AArch64WinCOFFStreamer &getStreamer();
-
-public:
- AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
- : AArch64TargetStreamer(S) {}
-};
-
-} // end anonymous namespace
-
namespace llvm {
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7a8dd8bc5aee..23a65b345bad 100644
--- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1535,7 +1535,7 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
-
+
//===----------------------------------------------------------------------===//
// SVE Floating Point Unary Operations - Unpredicated Group
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 23cc21ce2e7c..c88155db7037 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -62,6 +62,13 @@ namespace llvm {
}
namespace llvm {
+ namespace AArch64PRCTX {
+#define GET_PRCTX_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64PRFM {
#define GET_PRFM_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -104,6 +111,13 @@ namespace llvm {
}
namespace llvm {
+ namespace AArch64BTIHint {
+#define GET_BTI_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64SysReg {
#define GET_SYSREG_IMPL
#include "AArch64GenSystemOperands.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2874c4ab42ea..44c6a6b44895 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -388,6 +388,14 @@ namespace AArch64PSBHint {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64BTIHint {
+ struct BTI : SysAlias {
+ using SysAlias::SysAlias;
+ };
+ #define GET_BTI_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
@@ -499,6 +507,14 @@ namespace AArch64TLBI {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64PRCTX {
+ struct PRCTX : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+ };
+ #define GET_PRCTX_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64II {
/// Target Operand Flag enum.
enum TOF {
@@ -507,7 +523,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0xf,
+ MO_FRAGMENT = 0x7,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -540,6 +556,11 @@ namespace AArch64II {
/// by-12-bits instruction.
MO_HI12 = 7,
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB = 0x8,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
@@ -560,6 +581,10 @@ namespace AArch64II {
/// to the symbol is for an import stub. This is used for DLL import
/// storage class indication on Windows.
MO_DLLIMPORT = 0x80,
+
+ /// MO_S - Indicates that the bits of the symbol operand represented by
+ /// MO_G0 etc are signed.
+ MO_S = 0x100,
};
} // end namespace AArch64II
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2b49c2ea88e1..bb7801c172f6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -37,10 +37,13 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
// SI Passes
+FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIFixupVectorISelPass();
+FunctionPass *createSIAddIMGInitPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass();
FunctionPass *createSIWholeQuadModePass();
@@ -57,6 +60,7 @@ FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
+FunctionPass *createSIModeRegisterPass();
void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
@@ -69,10 +73,18 @@ Pass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
+FunctionPass *createAMDGPUAtomicOptimizerPass();
+void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
+extern char &AMDGPUAtomicOptimizerID;
+
ModulePass *createAMDGPULowerIntrinsicsPass();
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
extern char &AMDGPULowerIntrinsicsID;
+ModulePass *createAMDGPUFixFunctionBitcastsPass();
+void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
+extern char &AMDGPUFixFunctionBitcastsID;
+
FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
@@ -84,6 +96,9 @@ extern char &AMDGPULowerKernelAttributesID;
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
+void initializeGCNDPPCombinePass(PassRegistry &);
+extern char &GCNDPPCombineID;
+
void initializeR600ClauseMergePassPass(PassRegistry &);
extern char &R600ClauseMergePassID;
@@ -114,6 +129,9 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
+void initializeSIFixupVectorISelPass(PassRegistry &);
+extern char &SIFixupVectorISelID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -141,6 +159,9 @@ extern char &AMDGPUSimplifyLibCallsID;
void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
extern char &AMDGPUUseNativeCallsID;
+void initializeSIAddIMGInitPass(PassRegistry &);
+extern char &SIAddIMGInitID;
+
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
@@ -179,6 +200,9 @@ extern char &SIMemoryLegalizerID;
void initializeSIDebuggerInsertNopsPass(PassRegistry&);
extern char &SIDebuggerInsertNopsID;
+void initializeSIModeRegisterPass(PassRegistry&);
+extern char &SIModeRegisterID;
+
void initializeSIInsertWaitcntsPass(PassRegistry&);
extern char &SIInsertWaitcntsID;
@@ -190,6 +214,8 @@ extern char &AMDGPUUnifyDivergentExitNodesID;
ImmutablePass *createAMDGPUAAWrapperPass();
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+ImmutablePass *createAMDGPUExternalAAWrapperPass();
+void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
@@ -221,19 +247,18 @@ enum TargetIndex {
/// however on the GPU, each address space points to
/// a separate piece of memory that is unique from other
/// memory locations.
-struct AMDGPUAS {
- // The following address space values depend on the triple environment.
- unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
- unsigned FLAT_ADDRESS; ///< Address space for flat memory.
- unsigned REGION_ADDRESS; ///< Address space for region memory.
-
+namespace AMDGPUAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_AMDGPU_ADDRESS = 6,
+ FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ REGION_ADDRESS = 2, ///< Address space for region memory.
+
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ PRIVATE_ADDRESS = 5, ///< Address space for private memory.
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
@@ -268,14 +293,6 @@ struct AMDGPUAS {
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u,
};
-};
-
-namespace llvm {
-namespace AMDGPU {
-AMDGPUAS getAMDGPUAS(const Module &M);
-AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
-AMDGPUAS getAMDGPUAS(Triple T);
-} // namespace AMDGPU
-} // namespace llvm
+}
#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 445b69b35eb1..6a4cfe08e491 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -11,6 +11,10 @@ include "llvm/TableGen/SearchableTable.td"
include "llvm/Target/Target.td"
include "AMDGPUFeatures.td"
+class BoolToList<bit Value> {
+ list<int> ret = !if(Value, [1]<int>, []<int>);
+}
+
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
@@ -140,6 +144,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
"Additional instructions for CI+"
>;
+def FeatureVIInsts : SubtargetFeature<"vi-insts",
+ "VIInsts",
+ "true",
+ "Additional instructions for VI+"
+>;
+
def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
"GFX9Insts",
"true",
@@ -236,6 +246,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+def FeatureR128A16 : SubtargetFeature<"r128-a16",
+ "HasR128A16",
+ "true",
+ "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -251,16 +267,19 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
def FeatureDLInsts : SubtargetFeature<"dl-insts",
"HasDLInsts",
"true",
- "Has deep learning instructions"
+ "Has v_fmac_f32 and v_xnor_b32 instructions"
+>;
+
+def FeatureDotInsts : SubtargetFeature<"dot-insts",
+ "HasDotInsts",
+ "true",
+ "Has v_dot* instructions"
>;
-def FeatureD16PreservesUnusedBits : SubtargetFeature<
- "d16-preserves-unused-bits",
- "D16PreservesUnusedBits",
+def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
+ "EnableSRAMECC",
"true",
- "If present, then instructions defined by HasD16LoadStore predicate preserve "
- "unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
- "zero unused bits."
+ "Enable SRAM ECC"
>;
//===------------------------------------------------------------===//
@@ -315,12 +334,6 @@ def FeatureEnableHugePrivateBuffer : SubtargetFeature<
"Enable private/scratch buffer sizes greater than 128 GB"
>;
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
- "EnableVGPRSpilling",
- "true",
- "Enable spilling of VGPRs to scratch memory"
->;
-
def FeatureDumpCode : SubtargetFeature <"DumpCode",
"DumpCode",
"true",
@@ -364,6 +377,16 @@ def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
"Use ds_{read|write}_b128"
>;
+// Sparse texture support requires that all result registers are zeroed when
+// PRTStrictNull is set to true. This feature is turned on for all architectures
+// but is enabled as a feature in case there are situations where PRTStrictNull
+// is disabled by the driver.
+def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
+ "EnablePRTStrictNull",
+ "true",
+ "Enable zeroing of result registers for sparse texture fetches"
+>;
+
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
@@ -390,6 +413,12 @@ def FeatureCodeObjectV3 : SubtargetFeature <
"Generate code object version 3"
>;
+def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
+ "HasTrigReducedRange",
+ "true",
+ "Requires use of fract on arguments to trig instructions"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -409,36 +438,36 @@ class GCNSubtargetFeatureGeneration <string Value,
def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN,
- FeatureLDSBankCount32, FeatureMovrel]
+ FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
- FeatureCIInsts, FeatureMovrel]
+ FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
- FeatureIntClamp
+ FeatureIntClamp, FeatureTrigReducedRange
]
>;
def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
- FeatureAddNoCarryInsts, FeatureScalarAtomics
+ FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
@@ -456,34 +485,41 @@ def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
[FeatureSouthernIslands,
FeatureFastFMAF32,
HalfRate64Ops,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
[FeatureSouthernIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
[FeatureSeaIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
[FeatureSeaIslands,
HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureFastFMAF32]>;
+ FeatureFastFMAF32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
[FeatureSeaIslands,
FeatureLDSBankCount16,
- FeatureFastFMAF32]>;
+ FeatureFastFMAF32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
[FeatureSeaIslands,
- FeatureLDSBankCount16]>;
+ FeatureLDSBankCount16,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4,
[FeatureSeaIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
[FeatureVolcanicIslands,
@@ -491,49 +527,63 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
HalfRate64Ops,
FeatureLDSBankCount32,
FeatureXNACK,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
FeatureSGPRInitBug,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
- FeatureUnpackedD16VMem]>;
+ FeatureUnpackedD16VMem,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
[FeatureVolcanicIslands,
FeatureLDSBankCount16,
- FeatureXNACK]>;
+ FeatureXNACK,
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
- FeatureD16PreservesUnusedBits]>;
+ FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
[FeatureGFX9,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
- FeatureDLInsts]>;
+ FeatureDLInsts,
+ FeatureDotInsts,
+ FeatureSRAMECC,
+ FeatureCodeObjectV3]>;
+
+def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
+ [FeatureGFX9,
+ FeatureMadMixInsts,
+ FeatureLDSBankCount32,
+ FeatureXNACK,
+ FeatureCodeObjectV3]>;
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
@@ -665,8 +715,9 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;
-def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
- AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
+def D16PreservesUnusedBits :
+ Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
+ AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
@@ -674,10 +725,10 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX9Insts">;
-def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
+def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
AssemblerPredicate<"FeatureAddNoCarryInsts">;
-def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">,
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">,
AssemblerPredicate<"!FeatureAddNoCarryInsts">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
@@ -697,6 +748,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
+def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
+ AssemblerPredicate<"FeatureR128A16">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
@@ -719,6 +773,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
AssemblerPredicate<"FeatureDLInsts">;
+def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
+ AssemblerPredicate<"FeatureDotInsts">;
+
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
@@ -727,7 +784,6 @@ def EnableLateCFGStructurize : Predicate<
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
include "SIIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPURegisterBanks.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 974fbcb87191..73709ba13643 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -34,34 +34,28 @@ using namespace llvm;
// Register this pass...
char AMDGPUAAWrapperPass::ID = 0;
+char AMDGPUExternalAAWrapper::ID = 0;
INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
"AMDGPU Address space based Alias Analysis", false, true)
+INITIALIZE_PASS(AMDGPUExternalAAWrapper, "amdgpu-aa-wrapper",
+ "AMDGPU Address space based Alias Analysis Wrapper", false, true)
+
ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
return new AMDGPUAAWrapperPass();
}
+ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
+ return new AMDGPUExternalAAWrapper();
+}
+
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
-// Must match the table in getAliasResult.
-AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
- : Arch(Arch_), AS(AS_) {
- // These arrarys are indexed by address space value
- // enum elements 0 ... to 6
- static const AliasResult ASAliasRulesPrivIsZero[7][7] = {
- /* Private Global Constant Group Flat Region Constant 32-bit */
- /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
- /* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
- /* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
- /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias},
- /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
- /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
- /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}
- };
- static const AliasResult ASAliasRulesGenIsZero[7][7] = {
+// These arrays are indexed by address space value enum elements 0 ... to 6
+static const AliasResult ASAliasRules[7][7] = {
/* Flat Global Region Group Constant Private Constant 32-bit */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
@@ -70,37 +64,15 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
- };
+};
+
+static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
- if (AS.FLAT_ADDRESS == 0) {
- assert(AS.GLOBAL_ADDRESS == 1 &&
- AS.REGION_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.CONSTANT_ADDRESS == 4 &&
- AS.PRIVATE_ADDRESS == 5 &&
- AS.CONSTANT_ADDRESS_32BIT == 6);
- ASAliasRules = &ASAliasRulesGenIsZero;
- } else {
- assert(AS.PRIVATE_ADDRESS == 0 &&
- AS.GLOBAL_ADDRESS == 1 &&
- AS.CONSTANT_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.FLAT_ADDRESS == 4 &&
- AS.REGION_ADDRESS == 5 &&
- AS.CONSTANT_ADDRESS_32BIT == 6);
- ASAliasRules = &ASAliasRulesPrivIsZero;
- }
-}
-AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
- unsigned AS2) const {
- if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) {
- if (Arch == Triple::amdgcn)
- report_fatal_error("Pointer address space out of range");
- return AS1 == AS2 ? MayAlias : NoAlias;
- }
+ if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
+ return MayAlias;
- return (*ASAliasRules)[AS1][AS2];
+ return ASAliasRules[AS1][AS2];
}
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
@@ -108,8 +80,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
- AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
- if (Result == NoAlias) return Result;
+ AliasResult Result = getAliasResult(asA, asB);
+ if (Result == NoAlias)
+ return Result;
// Forward the query to the next alias analysis.
return AAResultBase::alias(LocA, LocB);
@@ -118,9 +91,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
-
- if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS ||
- Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) {
+ unsigned AS = Base->getType()->getPointerAddressSpace();
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 09ad51d5e42f..d76c9fc48199 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -33,14 +33,12 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
friend AAResultBase<AMDGPUAAResult>;
const DataLayout &DL;
- AMDGPUAS AS;
public:
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
- DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
+ DL(DL) {}
AMDGPUAAResult(AMDGPUAAResult &&Arg)
- : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
- ASAliasRules(Arg.ASAliasRules){}
+ : AAResultBase(std::move(Arg)), DL(Arg.DL) {}
/// Handle invalidation events from the new pass manager.
///
@@ -53,18 +51,6 @@ public:
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
bool PathAliases(const MDNode *A, const MDNode *B) const;
-
- class ASAliasRulesTy {
- public:
- ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
-
- AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
-
- private:
- Triple::ArchType Arch;
- AMDGPUAS AS;
- const AliasResult (*ASAliasRules)[7][7];
- } ASAliasRules;
};
/// Analysis pass providing a never-invalidated alias analysis result.
@@ -110,6 +96,19 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+// Wrapper around ExternalAAWrapperPass so that the default constructor gets the
+// callback.
+class AMDGPUExternalAAWrapper : public ExternalAAWrapperPass {
+public:
+ static char ID;
+
+ AMDGPUExternalAAWrapper() : ExternalAAWrapperPass(
+ [](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }) {}
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index d4bbb2c1eb8d..fc65430b745f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -86,8 +86,6 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
}
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
- AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
-
std::vector<GlobalAlias*> AliasesToRemove;
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
@@ -122,7 +120,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (GlobalVariable &GV : M.globals()) {
// TODO: Region address
unsigned AS = GV.getType()->getAddressSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
continue;
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 1a70833a4472..896ac9c87779 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,7 +46,6 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- AMDGPUAS AS;
bool addFeatureAttributes(Function &F);
@@ -67,11 +66,10 @@ public:
CallGraphSCCPass::getAnalysisUsage(AU);
}
- static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
+ static bool visitConstantExpr(const ConstantExpr *CE);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- AMDGPUAS AS);
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
};
} // end anonymous namespace
@@ -85,20 +83,18 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
- return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS) {
+ return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
}
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
- const AMDGPUAS &AS) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
+ return castRequiresQueuePtr(ASC->getSrcAddressSpace());
}
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
- AMDGPUAS AS) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS, AS);
+ return castRequiresQueuePtr(SrcAS);
}
return false;
@@ -106,8 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- AMDGPUAS AS) {
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@@ -120,7 +115,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (visitConstantExpr(CE, AS))
+ if (visitConstantExpr(CE))
return true;
}
@@ -262,7 +257,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
continue;
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC, AS)) {
+ if (castRequiresQueuePtr(ASC)) {
NeedQueuePtr = true;
continue;
}
@@ -273,7 +268,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
NeedQueuePtr = true;
break;
}
@@ -318,7 +313,6 @@ bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
if (!TPC)
report_fatal_error("TargetMachine is required");
- AS = AMDGPU::getAMDGPUAS(CG.getModule());
TM = &TPC->getTM<TargetMachine>();
return false;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index ed5370826647..f88e3b0dac86 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -16,7 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/IRBuilder.h"
@@ -32,12 +32,11 @@ namespace {
class AMDGPUAnnotateUniformValues : public FunctionPass,
public InstVisitor<AMDGPUAnnotateUniformValues> {
- DivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA;
MemoryDependenceResults *MDR;
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isKernelFunc;
- AMDGPUAS AMDGPUASI;
public:
static char ID;
@@ -49,7 +48,7 @@ public:
return "AMDGPU Annotate Uniform Values";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
@@ -64,7 +63,7 @@ public:
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
@@ -118,14 +117,8 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
}
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
- if (I.isUnconditional())
- return;
-
- Value *Cond = I.getCondition();
- if (!DA->isUniform(Cond))
- return;
-
- setUniformMetadata(I.getParent()->getTerminator());
+ if (DA->isUniform(&I))
+ setUniformMetadata(I.getParent()->getTerminator());
}
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
@@ -133,7 +126,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
if (!DA->isUniform(Ptr))
return;
auto isGlobalLoad = [&](LoadInst &Load)->bool {
- return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
};
// We're tracking up to the Function boundaries
// We cannot go beyond because of FunctionPass restrictions
@@ -168,7 +161,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(M);
return false;
}
@@ -176,7 +168,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e62e5d52ad74..2ded7cdb6489 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -40,11 +40,13 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
// TODO: This should get the default rounding mode from the kernel. We just set
// the default here, but this could change if the OpenCL rounding mode pragmas
@@ -98,8 +100,11 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
- AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
- }
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ HSAMetadataStream.reset(new MetadataStreamerV3());
+ else
+ HSAMetadataStream.reset(new MetadataStreamerV2());
+}
StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
@@ -116,62 +121,70 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
+ if (IsaInfo::hasCodeObjectV3(getSTI())) {
+ std::string ExpectedTarget;
+ raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+ IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+
+ getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
+ }
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- HSAMetadataStream.begin(M);
+ HSAMetadataStream->begin(M);
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
readPALMetadata(M);
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ return;
+
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
+ IsaVersion Version = getIsaVersion(getSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
- ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
- // TODO: Add metadata to code object v3.
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
-
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
return;
- // Emit ISA Version (NT_AMD_AMDGPU_ISA).
- std::string ISAVersionString;
- raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
- getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit ISA Version (NT_AMD_AMDGPU_ISA).
+ std::string ISAVersionString;
+ raw_string_ostream ISAVersionStream(ISAVersionString);
+ IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+ getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ }
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
- HSAMetadataStream.end();
- getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
+ HSAMetadataStream->end();
+ bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
+ (void)Success;
+ assert(Success && "Malformed HSA Metadata");
}
- // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
- // Copy the PAL metadata from the map where we collected it into a vector,
- // then write it as a .note.
- PALMD::Metadata PALMetadataVector;
- for (auto i : PALMetadataMap) {
- PALMetadataVector.push_back(i.first);
- PALMetadataVector.push_back(i.second);
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+ // Copy the PAL metadata from the map where we collected it into a vector,
+ // then write it as a .note.
+ PALMD::Metadata PALMetadataVector;
+ for (auto i : PALMetadataMap) {
+ PALMetadataVector.push_back(i.first);
+ PALMetadataVector.push_back(i.second);
+ }
+ getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
- getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
}
@@ -193,13 +206,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
- if (STM.isAmdCodeObjectV2(F) &&
+ if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -207,10 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
- return;
-
- HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
+ if (STM.isAmdHsaOS())
+ HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
}
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
@@ -241,7 +249,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+ IsaInfo::getNumExtraSGPRs(getSTI(),
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
@@ -259,7 +267,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
- if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
+ if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, &MF->getFunction()),
getTargetStreamer()->EmitAMDGPUSymbolType(
@@ -562,7 +570,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
- return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
UsesVCC, UsesFlatScratch);
}
@@ -759,7 +767,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+ 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -824,7 +832,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -906,9 +914,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+ &STM, ProgInfo.NumSGPRsForWavesPerEU);
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
+ &STM, ProgInfo.NumVGPRsForWavesPerEU);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -1003,7 +1011,6 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
@@ -1024,10 +1031,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(RsrcReg, 4);
OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
- if (STM.isVGPRSpillingEnabled(MF.getFunction())) {
- OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
- }
+ OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+ OutStreamer->EmitIntValue(
+ S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
}
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
@@ -1138,7 +1144,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 22982d912c70..167ac4b21e1e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -56,7 +56,7 @@ private:
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
- AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
+ std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
std::map<uint32_t, uint32_t> PALMetadataMap;
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
@@ -143,7 +143,6 @@ public:
protected:
mutable std::vector<std::string> DisasmLines, HexLines;
mutable size_t DisasmLineMaxLen;
- AMDGPUAS AMDGPUASI;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
new file mode 100644
index 000000000000..644e4fd558ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -0,0 +1,458 @@
+//===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optimizes atomic operations by using a single lane of a wavefront
+/// to perform the atomic operation, thus reducing contention on that memory
+/// location.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "amdgpu-atomic-optimizer"
+
+using namespace llvm;
+
+namespace {
+
+enum DPP_CTRL {
+ DPP_ROW_SR1 = 0x111,
+ DPP_ROW_SR2 = 0x112,
+ DPP_ROW_SR4 = 0x114,
+ DPP_ROW_SR8 = 0x118,
+ DPP_WF_SR1 = 0x138,
+ DPP_ROW_BCAST15 = 0x142,
+ DPP_ROW_BCAST31 = 0x143
+};
+
+struct ReplacementInfo {
+ Instruction *I;
+ Instruction::BinaryOps Op;
+ unsigned ValIdx;
+ bool ValDivergent;
+};
+
+class AMDGPUAtomicOptimizer : public FunctionPass,
+ public InstVisitor<AMDGPUAtomicOptimizer> {
+private:
+ SmallVector<ReplacementInfo, 8> ToReplace;
+ const LegacyDivergenceAnalysis *DA;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ bool HasDPP;
+ bool IsPixelShader;
+
+ void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
+ unsigned ValIdx, bool ValDivergent) const;
+
+ void setConvergent(CallInst *const CI) const;
+
+public:
+ static char ID;
+
+ AMDGPUAtomicOptimizer() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I);
+ void visitIntrinsicInst(IntrinsicInst &I);
+};
+
+} // namespace
+
+char AMDGPUAtomicOptimizer::ID = 0;
+
+char &llvm::AMDGPUAtomicOptimizerID = AMDGPUAtomicOptimizer::ID;
+
+bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
+ if (skipFunction(F)) {
+ return false;
+ }
+
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ DL = &F.getParent()->getDataLayout();
+ DominatorTreeWrapperPass *const DTW =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTW ? &DTW->getDomTree() : nullptr;
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const TargetMachine &TM = TPC.getTM<TargetMachine>();
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ HasDPP = ST.hasDPP();
+ IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+ visit(F);
+
+ const bool Changed = !ToReplace.empty();
+
+ for (ReplacementInfo &Info : ToReplace) {
+ optimizeAtomic(*Info.I, Info.Op, Info.ValIdx, Info.ValDivergent);
+ }
+
+ ToReplace.clear();
+
+ return Changed;
+}
+
+void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
+ // Early exit for unhandled address space atomic instructions.
+ switch (I.getPointerAddressSpace()) {
+ default:
+ return;
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::LOCAL_ADDRESS:
+ break;
+ }
+
+ Instruction::BinaryOps Op;
+
+ switch (I.getOperation()) {
+ default:
+ return;
+ case AtomicRMWInst::Add:
+ Op = Instruction::Add;
+ break;
+ case AtomicRMWInst::Sub:
+ Op = Instruction::Sub;
+ break;
+ }
+
+ const unsigned PtrIdx = 0;
+ const unsigned ValIdx = 1;
+
+ // If the pointer operand is divergent, then each lane is doing an atomic
+ // operation on a different address, and we cannot optimize that.
+ if (DA->isDivergent(I.getOperand(PtrIdx))) {
+ return;
+ }
+
+ const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+ // If the value operand is divergent, each lane is contributing a different
+ // value to the atomic calculation. We can only optimize divergent values if
+ // we have DPP available on our subtarget, and the atomic operation is 32
+ // bits.
+ if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ return;
+ }
+
+ // If we get here, we can optimize the atomic using a single wavefront-wide
+ // atomic operation to do the calculation for the entire wavefront, so
+ // remember the instruction so we can come back to it.
+ const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+ ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
+ Instruction::BinaryOps Op;
+
+ switch (I.getIntrinsicID()) {
+ default:
+ return;
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ Op = Instruction::Add;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ Op = Instruction::Sub;
+ break;
+ }
+
+ const unsigned ValIdx = 0;
+
+ const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+ // If the value operand is divergent, each lane is contributing a different
+ // value to the atomic calculation. We can only optimize divergent values if
+ // we have DPP available on our subtarget, and the atomic operation is 32
+ // bits.
+ if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+ return;
+ }
+
+ // If any of the other arguments to the intrinsic are divergent, we can't
+ // optimize the operation.
+ for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
+ if (DA->isDivergent(I.getOperand(Idx))) {
+ return;
+ }
+ }
+
+ // If we get here, we can optimize the atomic using a single wavefront-wide
+ // atomic operation to do the calculation for the entire wavefront, so
+ // remember the instruction so we can come back to it.
+ const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+ ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
+ Instruction::BinaryOps Op,
+ unsigned ValIdx,
+ bool ValDivergent) const {
+ LLVMContext &Context = I.getContext();
+
+ // Start building just before the instruction.
+ IRBuilder<> B(&I);
+
+ // If we are in a pixel shader, because of how we have to mask out helper
+ // lane invocations, we need to record the entry and exit BB's.
+ BasicBlock *PixelEntryBB = nullptr;
+ BasicBlock *PixelExitBB = nullptr;
+
+ // If we're optimizing an atomic within a pixel shader, we need to wrap the
+ // entire atomic operation in a helper-lane check. We do not want any helper
+ // lanes that are around only for the purposes of derivatives to take part
+ // in any cross-lane communication, and we use a branch on whether the lane is
+ // live to do this.
+ if (IsPixelShader) {
+ // Record I's original position as the entry block.
+ PixelEntryBB = I.getParent();
+
+ Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
+ Instruction *const NonHelperTerminator =
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+ // Record I's new position as the exit block.
+ PixelExitBB = I.getParent();
+
+ I.moveBefore(NonHelperTerminator);
+ B.SetInsertPoint(&I);
+ }
+
+ Type *const Ty = I.getType();
+ const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
+ Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
+
+ // This is the value in the atomic operation we need to combine in order to
+ // reduce the number of atomic operations.
+ Value *const V = I.getOperand(ValIdx);
+
+ // We need to know how many lanes are active within the wavefront, and we do
+ // this by getting the exec register, which tells us all the lanes that are
+ // active.
+ MDNode *const RegName =
+ llvm::MDNode::get(Context, llvm::MDString::get(Context, "exec"));
+ Value *const Metadata = llvm::MetadataAsValue::get(Context, RegName);
+ CallInst *const Exec =
+ B.CreateIntrinsic(Intrinsic::read_register, {B.getInt64Ty()}, {Metadata});
+ setConvergent(Exec);
+
+ // We need to know how many lanes are active within the wavefront that are
+ // below us. If we counted each lane linearly starting from 0, a lane is
+ // below us only if its associated index was less than ours. We do this by
+ // using the mbcnt intrinsic.
+ Value *const BitCast = B.CreateBitCast(Exec, VecTy);
+ Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
+ Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
+ CallInst *const PartialMbcnt = B.CreateIntrinsic(
+ Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
+ CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+ {ExtractHi, PartialMbcnt});
+
+ Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+
+ Value *LaneOffset = nullptr;
+ Value *NewV = nullptr;
+
+ // If we have a divergent value in each lane, we need to combine the value
+ // using DPP.
+ if (ValDivergent) {
+ // First we need to set all inactive invocations to 0, so that they can
+ // correctly contribute to the final result.
+ CallInst *const SetInactive = B.CreateIntrinsic(
+ Intrinsic::amdgcn_set_inactive, Ty, {V, B.getIntN(TyBitWidth, 0)});
+ setConvergent(SetInactive);
+ NewV = SetInactive;
+
+ const unsigned Iters = 6;
+ const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1, DPP_ROW_SR2,
+ DPP_ROW_SR4, DPP_ROW_SR8,
+ DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+ const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+
+ // This loop performs an inclusive scan across the wavefront, with all lanes
+ // active (by using the WWM intrinsic).
+ for (unsigned Idx = 0; Idx < Iters; Idx++) {
+ CallInst *const DPP = B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, Ty,
+ {NewV, B.getInt32(DPPCtrl[Idx]),
+ B.getInt32(RowMask[Idx]),
+ B.getInt32(0xf), B.getFalse()});
+ setConvergent(DPP);
+ Value *const WWM = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+ NewV = B.CreateBinOp(Op, NewV, WWM);
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
+ }
+
+ // NewV has returned the inclusive scan of V, but for the lane offset we
+ // require an exclusive scan. We do this by shifting the values from the
+ // entire wavefront right by 1, and by setting the bound_ctrl (last argument
+ // to the intrinsic below) to true, we can guarantee that 0 will be shifted
+ // into the 0'th invocation.
+ CallInst *const DPP =
+ B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, {Ty},
+ {NewV, B.getInt32(DPP_WF_SR1), B.getInt32(0xf),
+ B.getInt32(0xf), B.getTrue()});
+ setConvergent(DPP);
+ LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+ // Read the value from the last lane, which has accumlated the values of
+ // each active lane in the wavefront. This will be our new value with which
+ // we will provide to the atomic operation.
+ if (TyBitWidth == 64) {
+ Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
+ Value *const ExtractHi =
+ B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
+ CallInst *const ReadLaneLo = B.CreateIntrinsic(
+ Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
+ setConvergent(ReadLaneLo);
+ CallInst *const ReadLaneHi = B.CreateIntrinsic(
+ Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
+ setConvergent(ReadLaneHi);
+ Value *const PartialInsert = B.CreateInsertElement(
+ UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
+ Value *const Insert =
+ B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
+ NewV = B.CreateBitCast(Insert, Ty);
+ } else if (TyBitWidth == 32) {
+ CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
+ {}, {NewV, B.getInt32(63)});
+ setConvergent(ReadLane);
+ NewV = ReadLane;
+ } else {
+ llvm_unreachable("Unhandled atomic bit width");
+ }
+ } else {
+ // Get the total number of active lanes we have by using popcount.
+ Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Exec);
+ Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
+
+ // Calculate the new value we will be contributing to the atomic operation
+ // for the entire wavefront.
+ NewV = B.CreateMul(V, CtpopCast);
+ LaneOffset = B.CreateMul(V, MbcntCast);
+ }
+
+ // We only want a single lane to enter our new control flow, and we do this
+ // by checking if there are any active lanes below us. Only one lane will
+ // have 0 active lanes below us, so that will be the only one to progress.
+ Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+
+ // Store I's original basic block before we split the block.
+ BasicBlock *const EntryBB = I.getParent();
+
+ // We need to introduce some new control flow to force a single lane to be
+ // active. We do this by splitting I's basic block at I, and introducing the
+ // new block such that:
+ // entry --> single_lane -\
+ // \------------------> exit
+ Instruction *const SingleLaneTerminator =
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+ // Move the IR builder into single_lane next.
+ B.SetInsertPoint(SingleLaneTerminator);
+
+ // Clone the original atomic operation into single lane, replacing the
+ // original value with our newly created one.
+ Instruction *const NewI = I.clone();
+ B.Insert(NewI);
+ NewI->setOperand(ValIdx, NewV);
+
+ // Move the IR builder into exit next, and start inserting just before the
+ // original instruction.
+ B.SetInsertPoint(&I);
+
+ // Create a PHI node to get our new atomic result into the exit block.
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+ PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
+
+ // We need to broadcast the value who was the lowest active lane (the first
+ // lane) to all other lanes in the wavefront. We use an intrinsic for this,
+ // but have to handle 64-bit broadcasts with two calls to this intrinsic.
+ Value *BroadcastI = nullptr;
+
+ if (TyBitWidth == 64) {
+ Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
+ Value *const ExtractHi =
+ B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
+ CallInst *const ReadFirstLaneLo =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
+ setConvergent(ReadFirstLaneLo);
+ CallInst *const ReadFirstLaneHi =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+ setConvergent(ReadFirstLaneHi);
+ Value *const PartialInsert = B.CreateInsertElement(
+ UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+ Value *const Insert =
+ B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
+ BroadcastI = B.CreateBitCast(Insert, Ty);
+ } else if (TyBitWidth == 32) {
+ CallInst *const ReadFirstLane =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
+ setConvergent(ReadFirstLane);
+ BroadcastI = ReadFirstLane;
+ } else {
+ llvm_unreachable("Unhandled atomic bit width");
+ }
+
+ // Now that we have the result of our single atomic operation, we need to
+ // get our individual lane's slice into the result. We use the lane offset we
+ // previously calculated combined with the atomic result value we got from the
+ // first lane, to get our lane's index into the atomic result.
+ Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+
+ if (IsPixelShader) {
+ // Need a final PHI to reconverge to above the helper lane branch mask.
+ B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+
+ PHINode *const PHI = B.CreatePHI(Ty, 2);
+ PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+ PHI->addIncoming(Result, I.getParent());
+ I.replaceAllUsesWith(PHI);
+ } else {
+ // Replace the original atomic instruction with the new one.
+ I.replaceAllUsesWith(Result);
+ }
+
+ // And delete the original.
+ I.eraseFromParent();
+}
+
+void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+ "AMDGPU atomic optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+ "AMDGPU atomic optimizations", false, false)
+
+FunctionPass *llvm::createAMDGPUAtomicOptimizerPass() {
+ return new AMDGPUAtomicOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 18c7df0d94f2..daef37f9c21f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -28,11 +28,12 @@
using namespace llvm;
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
- : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
+ : CallLowering(&TLI) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
// FIXME: Add support for non-void returns.
if (Val)
return false;
@@ -50,7 +51,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
unsigned KernArgSegmentPtr =
@@ -72,7 +73,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
- PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index f51cb6abbf65..ed859716218e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -23,8 +23,6 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
- AMDGPUAS AMDGPUASI;
-
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
uint64_t Offset) const;
@@ -35,8 +33,8 @@ class AMDGPUCallLowering: public CallLowering {
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 68bc7fdd9961..367f120b5fa6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -19,7 +19,7 @@ class CCIfExtend<CCAction A>
// Calling convention for SI
def CC_SI : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -33,7 +33,7 @@ def CC_SI : CallingConv<[
CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -64,7 +64,7 @@ def RetCC_SI_Shader : CallingConv<[
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
- CCIfType<[f32, f16] , CCAssignToReg<[
+ CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 5713b7b7f9a8..4dc1e67c573d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -18,7 +18,7 @@
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Passes.h"
@@ -60,10 +60,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNSubtarget *ST = nullptr;
AssumptionCache *AC = nullptr;
- DivergenceAnalysis *DA = nullptr;
+ LegacyDivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
bool HasUnsafeFPMath = false;
- AMDGPUAS AMDGPUASI;
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
@@ -177,7 +176,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.setPreservesAll();
}
};
@@ -559,7 +558,7 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
Value *FQM = Builder.CreateFMul(FA, RCP);
// fq = trunc(fqm);
- CallInst* FQ = Builder.CreateIntrinsic(Intrinsic::trunc, { FQM });
+ CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
FQ->copyFastMathFlags(Builder.getFastMathFlags());
// float fqneg = -fq;
@@ -567,17 +566,17 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
// float fr = mad(fqneg, fb, fa);
Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz,
- { FQNeg, FB, FA }, FQ);
+ {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
// int iq = (int)fq;
Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
: Builder.CreateFPToUI(FQ, I32Ty);
// fr = fabs(fr);
- FR = Builder.CreateIntrinsic(Intrinsic::fabs, { FR }, FQ);
+ FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
// fb = fabs(fb);
- FB = Builder.CreateIntrinsic(Intrinsic::fabs, { FB }, FQ);
+ FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
// int cv = fr >= fb;
Value *CV = Builder.CreateFCmpOGE(FR, FB);
@@ -799,8 +798,8 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
if (!WidenLoads)
return false;
- if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
canWidenScalarExtLoad(I)) {
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
@@ -898,9 +897,8 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
ST = &TM.getSubtarget<GCNSubtarget>(F);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
HasUnsafeFPMath = hasUnsafeFPMath(F);
- AMDGPUASI = TM.getAMDGPUAS();
bool MadeChange = false;
@@ -918,7 +916,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
new file mode 100644
index 000000000000..6e2a981d3396
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -0,0 +1,63 @@
+//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Promote indirect (bitcast) calls to direct calls when they are statically
+/// known to be direct. Required when InstCombine is not run (e.g. at OptNone)
+/// because AMDGPU does not support indirect calls.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-fix-function-bitcasts"
+
+namespace {
+class AMDGPUFixFunctionBitcasts final
+ : public ModulePass,
+ public InstVisitor<AMDGPUFixFunctionBitcasts> {
+
+ bool runOnModule(Module &M) override;
+
+ bool Modified;
+
+public:
+ void visitCallSite(CallSite CS) {
+ if (CS.getCalledFunction())
+ return;
+ auto Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee && isLegalToPromote(CS, Callee)) {
+ promoteCall(CS, Callee);
+ Modified = true;
+ }
+ }
+
+ static char ID;
+ AMDGPUFixFunctionBitcasts() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUFixFunctionBitcasts::ID = 0;
+char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID;
+INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE,
+ "Fix function bitcasts for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() {
+ return new AMDGPUFixFunctionBitcasts();
+}
+
+bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) {
+ Modified = false;
+ visit(M);
+ return Modified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index ba735390f679..59bb2a16e0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -122,15 +122,14 @@ def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
}
def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
// FIXME: We can't re-use SelectionDAG patterns here because they match
// against a custom SDNode and we would need to create a generic machine
// instruction that is equivalent to the custom SDNode. This would also require
// us to custom legalize the intrinsic to the new generic machine instruction,
// but I can't get custom legalizing of intrinsic to work and I'm not sure if
// this is even supported yet.
-defm : GISelVop2IntrPat <
- int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>;
+def : GISelVop3Pat2ModsPat <
+ int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>;
defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 3a58c6c6a29f..6eab59ab4e09 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -16,34 +16,38 @@ namespace AMDGPU {
enum PartialMappingIdx {
None = - 1,
- PM_SGPR1 = 0,
- PM_SGPR16 = 4,
- PM_SGPR32 = 5,
- PM_SGPR64 = 6,
- PM_SGPR128 = 7,
- PM_SGPR256 = 8,
- PM_SGPR512 = 9,
- PM_VGPR1 = 10,
- PM_VGPR16 = 14,
- PM_VGPR32 = 15,
- PM_VGPR64 = 16,
- PM_VGPR128 = 17,
- PM_VGPR256 = 18,
- PM_VGPR512 = 19,
- PM_SGPR96 = 20,
- PM_VGPR96 = 21
+ PM_SGPR1 = 2,
+ PM_SGPR16 = 6,
+ PM_SGPR32 = 7,
+ PM_SGPR64 = 8,
+ PM_SGPR128 = 9,
+ PM_SGPR256 = 10,
+ PM_SGPR512 = 11,
+ PM_VGPR1 = 12,
+ PM_VGPR16 = 16,
+ PM_VGPR32 = 17,
+ PM_VGPR64 = 18,
+ PM_VGPR128 = 19,
+ PM_VGPR256 = 20,
+ PM_VGPR512 = 21,
+ PM_SGPR96 = 22,
+ PM_VGPR96 = 23
};
const RegisterBankInfo::PartialMapping PartMappings[] {
// StartIdx, Length, RegBank
{0, 1, SCCRegBank},
+ {0, 1, VCCRegBank},
+
+ {0, 1, SGPRRegBank}, // SGPR begin
{0, 16, SGPRRegBank},
{0, 32, SGPRRegBank},
{0, 64, SGPRRegBank},
{0, 128, SGPRRegBank},
{0, 256, SGPRRegBank},
{0, 512, SGPRRegBank},
- {0, 1, SGPRRegBank},
+
+ {0, 1, VGPRRegBank}, // VGPR begin
{0, 16, VGPRRegBank},
{0, 32, VGPRRegBank},
{0, 64, VGPRRegBank},
@@ -55,33 +59,43 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
};
const RegisterBankInfo::ValueMapping ValMappings[] {
+ // SCC
{&PartMappings[0], 1},
- {nullptr, 0},
- {nullptr, 0},
- {nullptr, 0},
+
+ // VCC
{&PartMappings[1], 1},
+
+ // SGPRs
{&PartMappings[2], 1},
+ {nullptr, 0}, // Illegal power of 2 sizes
+ {nullptr, 0},
+ {nullptr, 0},
{&PartMappings[3], 1},
{&PartMappings[4], 1},
{&PartMappings[5], 1},
{&PartMappings[6], 1},
{&PartMappings[7], 1},
+ {&PartMappings[8], 1},
+
+ // VGPRs
+ {&PartMappings[9], 1},
{nullptr, 0},
{nullptr, 0},
{nullptr, 0},
- {&PartMappings[8], 1},
- {&PartMappings[9], 1},
{&PartMappings[10], 1},
{&PartMappings[11], 1},
{&PartMappings[12], 1},
{&PartMappings[13], 1},
{&PartMappings[14], 1},
- {&PartMappings[15], 1}
+ {&PartMappings[15], 1},
+ {&PartMappings[16], 1},
+ {&PartMappings[17], 1}
};
enum ValueMappingIdx {
- SGPRStartIdx = 0,
- VGPRStartIdx = 10
+ SCCStartIdx = 0,
+ SGPRStartIdx = 2,
+ VGPRStartIdx = 12
};
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
@@ -89,16 +103,28 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
unsigned Idx;
switch (Size) {
case 1:
- Idx = BankID == AMDGPU::SCCRegBankID ? PM_SGPR1 : PM_VGPR1;
+ if (BankID == AMDGPU::SCCRegBankID)
+ return &ValMappings[0];
+ if (BankID == AMDGPU::VCCRegBankID)
+ return &ValMappings[1];
+
+ // 1-bit values not from a compare etc.
+ Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1;
break;
case 96:
+ assert(BankID != AMDGPU::VCCRegBankID);
Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96;
break;
default:
+ assert(BankID != AMDGPU::VCCRegBankID);
Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx;
Idx += Log2_32_Ceil(Size);
break;
}
+
+ assert(Log2_32_Ceil(Size) == Log2_32_Ceil(ValMappings[Idx].BreakDown->Length));
+ assert(BankID == ValMappings[Idx].BreakDown->RegBank->getID());
+
return &ValMappings[Idx];
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 01ef346f74ee..c38b0e61558b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -16,6 +16,7 @@
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata(
namespace AMDGPU {
namespace HSAMD {
-void MetadataStreamer::dump(StringRef HSAMetadataString) const {
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV2
+//===----------------------------------------------------------------------===//
+void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
-void MetadataStreamer::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
@@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef HSAMetadataString) const {
}
}
-AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+AccessQualifier
+MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
if (AccQual.empty())
return AccessQualifier::Unknown;
@@ -74,26 +79,29 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
.Default(AccessQualifier::Default);
}
-AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+AddressSpaceQualifier
+MetadataStreamerV2::getAddressSpaceQualifier(
unsigned AddressSpace) const {
- if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+ switch (AddressSpace) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
return AddressSpaceQualifier::Private;
- if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+ case AMDGPUAS::GLOBAL_ADDRESS:
return AddressSpaceQualifier::Global;
- if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+ case AMDGPUAS::CONSTANT_ADDRESS:
return AddressSpaceQualifier::Constant;
- if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+ case AMDGPUAS::LOCAL_ADDRESS:
return AddressSpaceQualifier::Local;
- if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+ case AMDGPUAS::FLAT_ADDRESS:
return AddressSpaceQualifier::Generic;
- if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+ case AMDGPUAS::REGION_ADDRESS:
return AddressSpaceQualifier::Region;
-
- llvm_unreachable("Unknown address space qualifier");
+ default:
+ return AddressSpaceQualifier::Unknown;
+ }
}
-ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
- StringRef BaseTypeName) const {
+ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
if (TypeQual.find("pipe") != StringRef::npos)
return ValueKind::Pipe;
@@ -114,13 +122,13 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
.Case("queue_t", ValueKind::Queue)
.Default(isa<PointerType>(Ty) ?
(Ty->getPointerAddressSpace() ==
- AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPUAS::LOCAL_ADDRESS ?
ValueKind::DynamicSharedPointer :
ValueKind::GlobalBuffer) :
ValueKind::ByValue);
}
-ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
auto Signed = !TypeName.startswith("u");
@@ -152,7 +160,7 @@ ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
}
}
-std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
if (!Signed)
@@ -189,8 +197,8 @@ std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
}
}
-std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
- MDNode *Node) const {
+std::vector<uint32_t>
+MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
std::vector<uint32_t> Dims;
if (Node->getNumOperands() != 3)
return Dims;
@@ -200,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
return Dims;
}
-Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::CodeProps::Metadata
+MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
@@ -229,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
return HSACodeProps;
}
-Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::DebugProps::Metadata
+MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
@@ -251,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
return HSADebugProps;
}
-void MetadataStreamer::emitVersion() {
+void MetadataStreamerV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
Version.push_back(VersionMajor);
Version.push_back(VersionMinor);
}
-void MetadataStreamer::emitPrintf(const Module &Mod) {
+void MetadataStreamerV2::emitPrintf(const Module &Mod) {
auto &Printf = HSAMetadata.mPrintf;
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
@@ -270,7 +278,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) {
Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
}
-void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
auto &Kernel = HSAMetadata.mKernels.back();
// TODO: What about other languages?
@@ -288,7 +296,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) {
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
}
-void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -306,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) {
}
}
-void MetadataStreamer::emitKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
for (auto &Arg : Func.args())
emitKernelArg(Arg);
emitHiddenKernelArgs(Func);
}
-void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
auto Func = Arg.getParent();
auto ArgNo = Arg.getArgNo();
const MDNode *Node;
@@ -355,7 +363,7 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
unsigned PointeeAlign = 0;
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
- if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
PointeeAlign = Arg.getParamAlignment();
if (PointeeAlign == 0)
PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
@@ -366,12 +374,12 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
}
-void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
- ValueKind ValueKind,
- unsigned PointeeAlign,
- StringRef Name,
- StringRef TypeName, StringRef BaseTypeName,
- StringRef AccQual, StringRef TypeQual) {
+void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
+ ValueKind ValueKind,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
@@ -384,7 +392,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
Arg.mPointeeAlign = PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Ty))
- Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+ Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
Arg.mAccQual = getAccessQualifier(AccQual);
@@ -404,7 +412,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
}
}
-void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
int HiddenArgNumBytes =
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
@@ -422,7 +430,7 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
- AMDGPUASI.GLOBAL_ADDRESS);
+ AMDGPUAS::GLOBAL_ADDRESS);
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
// "none" argument.
@@ -446,13 +454,16 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
}
}
-void MetadataStreamer::begin(const Module &Mod) {
- AMDGPUASI = getAMDGPUAS(Mod);
+bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
+}
+
+void MetadataStreamerV2::begin(const Module &Mod) {
emitVersion();
emitPrintf(Mod);
}
-void MetadataStreamer::end() {
+void MetadataStreamerV2::end() {
std::string HSAMetadataString;
if (toString(HSAMetadata, HSAMetadataString))
return;
@@ -463,7 +474,8 @@ void MetadataStreamer::end() {
verify(HSAMetadataString);
}
-void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
+void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;
@@ -483,6 +495,505 @@ void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV3
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
+}
+
+void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata Parser Test: ";
+
+ std::shared_ptr<msgpack::Node> FromHSAMetadataString =
+ std::make_shared<msgpack::MapNode>();
+
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> FromHSAMetadataString;
+ if (YIn.error()) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ std::string ToHSAMetadataString;
+ raw_string_ostream StrOS(ToHSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << FromHSAMetadataString;
+
+ errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
+ if (HSAMetadataString != ToHSAMetadataString) {
+ errs() << "Original input: " << HSAMetadataString << '\n'
+ << "Produced output: " << StrOS.str() << '\n';
+ }
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
+ return StringSwitch<Optional<StringRef>>(AccQual)
+ .Case("read_only", StringRef("read_only"))
+ .Case("write_only", StringRef("write_only"))
+ .Case("read_write", StringRef("read_write"))
+ .Default(None);
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
+ switch (AddressSpace) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ return StringRef("private");
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ return StringRef("global");
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ return StringRef("constant");
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return StringRef("local");
+ case AMDGPUAS::FLAT_ADDRESS:
+ return StringRef("generic");
+ case AMDGPUAS::REGION_ADDRESS:
+ return StringRef("region");
+ default:
+ return None;
+ }
+}
+
+StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
+ if (TypeQual.find("pipe") != StringRef::npos)
+ return "pipe";
+
+ return StringSwitch<StringRef>(BaseTypeName)
+ .Case("image1d_t", "image")
+ .Case("image1d_array_t", "image")
+ .Case("image1d_buffer_t", "image")
+ .Case("image2d_t", "image")
+ .Case("image2d_array_t", "image")
+ .Case("image2d_array_depth_t", "image")
+ .Case("image2d_array_msaa_t", "image")
+ .Case("image2d_array_msaa_depth_t", "image")
+ .Case("image2d_depth_t", "image")
+ .Case("image2d_msaa_t", "image")
+ .Case("image2d_msaa_depth_t", "image")
+ .Case("image3d_t", "image")
+ .Case("sampler_t", "sampler")
+ .Case("queue_t", "queue")
+ .Default(isa<PointerType>(Ty)
+ ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
+ ? "dynamic_shared_pointer"
+ : "global_buffer")
+ : "by_value");
+}
+
+StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ auto Signed = !TypeName.startswith("u");
+ switch (Ty->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? "i8" : "u8";
+ case 16:
+ return Signed ? "i16" : "u16";
+ case 32:
+ return Signed ? "i32" : "u32";
+ case 64:
+ return Signed ? "i64" : "u64";
+ default:
+ return "struct";
+ }
+ }
+ case Type::HalfTyID:
+ return "f16";
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ return getValueType(Ty->getPointerElementType(), TypeName);
+ case Type::VectorTyID:
+ return getValueType(Ty->getVectorElementType(), TypeName);
+ default:
+ return "struct";
+ }
+}
+
+std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ if (!Signed)
+ return (Twine('u') + getTypeName(Ty, true)).str();
+
+ auto BitWidth = Ty->getIntegerBitWidth();
+ switch (BitWidth) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BitWidth)).str();
+ }
+ }
+ case Type::HalfTyID:
+ return "half";
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ return "double";
+ case Type::VectorTyID: {
+ auto VecTy = cast<VectorType>(Ty);
+ auto ElTy = VecTy->getElementType();
+ auto NumElements = VecTy->getVectorNumElements();
+ return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+ }
+ default:
+ return "unknown";
+ }
+}
+
+std::shared_ptr<msgpack::ArrayNode>
+MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
+ auto Dims = std::make_shared<msgpack::ArrayNode>();
+ if (Node->getNumOperands() != 3)
+ return Dims;
+
+ for (auto &Op : Node->operands())
+ Dims->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+ return Dims;
+}
+
+void MetadataStreamerV3::emitVersion() {
+ auto Version = std::make_shared<msgpack::ArrayNode>();
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
+ getRootMetadata("amdhsa.version") = std::move(Version);
+}
+
+void MetadataStreamerV3::emitPrintf(const Module &Mod) {
+ auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+ if (!Node)
+ return;
+
+ auto Printf = std::make_shared<msgpack::ArrayNode>();
+ for (auto Op : Node->operands())
+ if (Op->getNumOperands())
+ Printf->push_back(std::make_shared<msgpack::ScalarNode>(
+ cast<MDString>(Op->getOperand(0))->getString()));
+ getRootMetadata("amdhsa.printf") = std::move(Printf);
+}
+
+void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
+ msgpack::MapNode &Kern) {
+ // TODO: What about other languages?
+ auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+ if (!Node || !Node->getNumOperands())
+ return;
+ auto Op0 = Node->getOperand(0);
+ if (Op0->getNumOperands() <= 1)
+ return;
+
+ Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
+ auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
+ Kern[".language_version"] = std::move(LanguageVersion);
+}
+
+void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
+ msgpack::MapNode &Kern) {
+
+ if (auto Node = Func.getMetadata("reqd_work_group_size"))
+ Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("work_group_size_hint"))
+ Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("vec_type_hint")) {
+ Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+ }
+ if (Func.hasFnAttribute("runtime-handle")) {
+ Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
+ Func.getFnAttribute("runtime-handle").getValueAsString().str());
+ }
+}
+
+void MetadataStreamerV3::emitKernelArgs(const Function &Func,
+ msgpack::MapNode &Kern) {
+ unsigned Offset = 0;
+ auto Args = std::make_shared<msgpack::ArrayNode>();
+ for (auto &Arg : Func.args())
+ emitKernelArg(Arg, Offset, *Args);
+
+ emitHiddenKernelArgs(Func, Offset, *Args);
+
+ // TODO: What about other languages?
+ if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel
+ // is used, otherwise emit dummy "none" arguments.
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ }
+ }
+
+ Kern[".args"] = std::move(Args);
+}
+
+void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ auto Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ else if (Arg.hasName())
+ Name = Arg.getName();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef AccQual;
+ if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+ Arg.hasNoAliasAttr()) {
+ AccQual = "read_only";
+ } else {
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ }
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ Type *Ty = Arg.getType();
+ const DataLayout &DL = Func->getParent()->getDataLayout();
+
+ unsigned PointeeAlign = 0;
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ PointeeAlign = Arg.getParamAlignment();
+ if (PointeeAlign == 0)
+ PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+ }
+ }
+
+ emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+ getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
+ Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
+ TypeQual);
+}
+
+void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
+ StringRef ValueKind, unsigned &Offset,
+ msgpack::ArrayNode &Args,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
+ auto ArgPtr = std::make_shared<msgpack::MapNode>();
+ auto &Arg = *ArgPtr;
+
+ if (!Name.empty())
+ Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+ if (!TypeName.empty())
+ Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+ auto Size = DL.getTypeAllocSize(Ty);
+ auto Align = DL.getABITypeAlignment(Ty);
+ Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+ Offset = alignTo(Offset, Align);
+ Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+ Offset += Size;
+ Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+ Arg[".value_type"] =
+ std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+ if (PointeeAlign)
+ Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty))
+ if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
+ Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+
+ if (auto AQ = getAccessQualifier(AccQual))
+ Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+
+ // TODO: Emit Arg[".actual_access"].
+
+ SmallVector<StringRef, 1> SplitTypeQuals;
+ TypeQual.split(SplitTypeQuals, " ", -1, false);
+ for (StringRef Key : SplitTypeQuals) {
+ if (Key == "const")
+ Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "restrict")
+ Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "volatile")
+ Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "pipe")
+ Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+ }
+
+ Args.push_back(std::move(ArgPtr));
+}
+
+void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
+ unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ int HiddenArgNumBytes =
+ getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+ if (!HiddenArgNumBytes)
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ if (HiddenArgNumBytes >= 8)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
+ if (HiddenArgNumBytes >= 16)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
+ if (HiddenArgNumBytes >= 24)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (HiddenArgNumBytes >= 32) {
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
+ if (HiddenArgNumBytes >= 48) {
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+ }
+}
+
+std::shared_ptr<msgpack::MapNode>
+MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ const Function &F = MF.getFunction();
+
+ auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
+ auto &Kern = *HSAKernelProps;
+
+ unsigned MaxKernArgAlign;
+ Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign));
+ Kern[".group_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+ Kern[".private_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+ Kern[".kernarg_segment_align"] =
+ std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern[".wavefront_size"] =
+ std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
+ Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
+ Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+ Kern[".max_flat_workgroup_size"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+ Kern[".sgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+ Kern[".vgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+
+ return HSAKernelProps;
+}
+
+bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+}
+
+void MetadataStreamerV3::begin(const Module &Mod) {
+ emitVersion();
+ emitPrintf(Mod);
+ getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+}
+
+void MetadataStreamerV3::end() {
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ if (DumpHSAMetadata)
+ dump(StrOS.str());
+ if (VerifyHSAMetadata)
+ verify(StrOS.str());
+}
+
+void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
+ auto &Func = MF.getFunction();
+ auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+
+ assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ Func.getCallingConv() == CallingConv::SPIR_KERNEL);
+
+ auto &KernelsNode = getRootMetadata("amdhsa.kernels");
+ auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+
+ {
+ auto &Kern = *KernelProps;
+ Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
+ Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
+ (Twine(Func.getName()) + Twine(".kd")).str());
+ emitKernelLanguage(Func, Kern);
+ emitKernelAttrs(Func, Kern);
+ emitKernelArgs(Func, Kern);
+ }
+
+ Kernels->push_back(std::move(KernelProps));
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 3424c956d781..afc09baf952d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -19,10 +19,12 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/Support/AMDGPUMetadata.h"
namespace llvm {
+class AMDGPUTargetStreamer;
class Argument;
class DataLayout;
class Function;
@@ -34,10 +36,94 @@ class Type;
namespace AMDGPU {
namespace HSAMD {
-class MetadataStreamer final {
+class MetadataStreamer {
+public:
+ virtual ~MetadataStreamer(){};
+
+ virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
+
+ virtual void begin(const Module &Mod) = 0;
+
+ virtual void end() = 0;
+
+ virtual void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) = 0;
+};
+
+class MetadataStreamerV3 final : public MetadataStreamer {
+private:
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot =
+ std::make_shared<msgpack::MapNode>();
+
+ void dump(StringRef HSAMetadataString) const;
+
+ void verify(StringRef HSAMetadataString) const;
+
+ Optional<StringRef> getAccessQualifier(StringRef AccQual) const;
+
+ Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const;
+
+ StringRef getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const;
+
+ StringRef getValueType(Type *Ty, StringRef TypeName) const;
+
+ std::string getTypeName(Type *Ty, bool Signed) const;
+
+ std::shared_ptr<msgpack::ArrayNode>
+ getWorkGroupDimensions(MDNode *Node) const;
+
+ std::shared_ptr<msgpack::MapNode>
+ getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const;
+
+ void emitVersion();
+
+ void emitPrintf(const Module &Mod);
+
+ void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
+ unsigned &Offset, msgpack::ArrayNode &Args,
+ unsigned PointeeAlign = 0, StringRef Name = "",
+ StringRef TypeName = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef TypeQual = "");
+
+ void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
+ return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+ }
+
+ std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
+ return HSAMetadataRoot;
+ }
+
+public:
+ MetadataStreamerV3() = default;
+ ~MetadataStreamerV3() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
+
+ void end() override;
+
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
+};
+
+class MetadataStreamerV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
- AMDGPUAS AMDGPUASI;
void dump(StringRef HSAMetadataString) const;
@@ -45,7 +131,7 @@ private:
AccessQualifier getAccessQualifier(StringRef AccQual) const;
- AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+ AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
@@ -83,19 +169,22 @@ private:
void emitHiddenKernelArgs(const Function &Func);
-public:
- MetadataStreamer() = default;
- ~MetadataStreamer() = default;
-
const Metadata &getHSAMetadata() const {
return HSAMetadata;
}
- void begin(const Module &Mod);
+public:
+ MetadataStreamerV2() = default;
+ ~MetadataStreamerV2() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
- void end();
+ void end() override;
- void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
};
} // end namespace HSAMD
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7cb0e12a6809..a0a045e72a58 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -29,7 +29,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -72,14 +72,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const GCNSubtarget *Subtarget;
- AMDGPUAS AMDGPUASI;
bool EnableLateStructurizeCFG;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
: SelectionDAGISel(*TM, OptLevel) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
~AMDGPUDAGToDAGISel() override = default;
@@ -87,7 +85,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
AU.addRequired<AMDGPUPerfHintAnalysis>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
SelectionDAGISel::getAnalysisUsage(AU);
}
@@ -103,9 +101,12 @@ private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
-
+ bool isVGPRImm(const SDNode *N) const;
+ bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
SDNode *glueCopyToM0(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
@@ -140,13 +141,6 @@ private:
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
- bool SelectMUBUFConstant(SDValue Constant,
- SDValue &SOffset,
- SDValue &ImmOffset) const;
- bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
- SDValue &ImmOffset) const;
- bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
- SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
@@ -224,7 +218,6 @@ protected:
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
const R600Subtarget *Subtarget;
- AMDGPUAS AMDGPUASI;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
@@ -232,9 +225,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
SDValue& Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
- }
+ AMDGPUDAGToDAGISel(TM, OptLevel) {}
void Select(SDNode *N) override;
@@ -251,12 +242,12 @@ protected:
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
-INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
/// This pass converts a legalized DAG into a AMDGPU-specific
@@ -350,7 +341,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
+ if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
!Subtarget->ldsRequiresM0Init())
return N;
@@ -372,6 +363,22 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
+ EVT VT) const {
+ SDNode *Lo = CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+ SDNode *Hi =
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+ const SDValue Ops[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
+}
+
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {
case 1:
@@ -557,19 +564,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
SDLoc DL(N);
- SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
- MVT::i32));
- SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
- const SDValue Ops[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- N->getValueType(0), Ops));
+ ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
return;
}
case ISD::LOAD:
@@ -641,6 +636,20 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::ATOMIC_CMP_SWAP:
SelectATOMIC_CMP_SWAP(N);
return;
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ case AMDGPUISD::CVT_PKNORM_I16_F32:
+ case AMDGPUISD::CVT_PKNORM_U16_F32:
+ case AMDGPUISD::CVT_PK_U16_U32:
+ case AMDGPUISD::CVT_PK_I16_I32: {
+ // Hack around using a legal type if f16 is illegal.
+ if (N->getValueType(0) == MVT::i32) {
+ MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
+ N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
+ { N->getOperand(0), N->getOperand(1) });
+ SelectCode(N);
+ return;
+ }
+ }
}
SelectCode(N);
@@ -969,8 +978,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
// default case
- // FIXME: This is broken on SI where we still need to check if the base
- // pointer is positive here.
Base = Addr;
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
@@ -1000,55 +1007,72 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ConstantSDNode *C1 = nullptr;
+ SDValue N0 = Addr;
if (CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ C1 = cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isUInt<32>(C1->getZExtValue()))
+ N0 = Addr.getOperand(0);
+ else
+ C1 = nullptr;
+ }
+
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add N2, N3) -> addr64, or
+ // (add (add N2, N3), C1) -> addr64
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1) -> addr64
- SDValue N2 = N0.getOperand(0);
- SDValue N3 = N0.getOperand(1);
- Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ if (N2->isDivergent()) {
+ if (N3->isDivergent()) {
+ // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
+ // addr64, and construct the resource from a 0 address.
+ Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+ VAddr = N0;
+ } else {
+ // N2 is divergent, N3 is not.
+ Ptr = N3;
+ VAddr = N2;
+ }
+ } else {
+ // N2 is not divergent.
Ptr = N2;
VAddr = N3;
- } else {
- // (add N0, C1) -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = N0;
- }
-
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return true;
}
-
- if (isUInt<32>(C1->getZExtValue())) {
- // Illegal offset, store it in soffset.
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
- 0);
- return true;
- }
- }
-
- if (Addr.getOpcode() == ISD::ADD) {
- // (add N0, N1) -> addr64
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ } else if (N0->isDivergent()) {
+ // N0 is divergent. Use it as the addr64, and construct the resource from a
+ // 0 address.
+ Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+ VAddr = N0;
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+ } else {
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
Ptr = N0;
- VAddr = N1;
+ }
+
+ if (!C1) {
+ // No offset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
- // default case -> offset
- VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Ptr = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+ // Legal offset for instruction.
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ return true;
+ }
+ // Illegal offset, store it in soffset.
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ SOffset =
+ SDValue(CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+ 0);
return true;
}
@@ -1252,101 +1276,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
- SDValue &SOffset,
- SDValue &ImmOffset) const {
- SDLoc DL(Constant);
- const uint32_t Align = 4;
- const uint32_t MaxImm = alignDown(4095, Align);
- uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
- uint32_t Overflow = 0;
-
- if (Imm > MaxImm) {
- if (Imm <= MaxImm + 64) {
- // Use an SOffset inline constant for 4..64
- Overflow = Imm - MaxImm;
- Imm = MaxImm;
- } else {
- // Try to keep the same value in SOffset for adjacent loads, so that
- // the corresponding register contents can be re-used.
- //
- // Load values with all low-bits (except for alignment bits) set into
- // SOffset, so that a larger range of values can be covered using
- // s_movk_i32.
- //
- // Atomic operations fail to work correctly when individual address
- // components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Align) & ~4095;
- uint32_t Low = (Imm + Align) & 4095;
- Imm = Low;
- Overflow = High - Align;
- }
- }
-
- // There is a hardware bug in SI and CI which prevents address clamping in
- // MUBUF instructions from working correctly with SOffsets. The immediate
- // offset is unaffected.
- if (Overflow > 0 &&
- Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
- return false;
-
- ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
-
- if (Overflow <= 64)
- SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
- else
- SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
- 0);
-
- return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
- SDValue &SOffset,
- SDValue &ImmOffset) const {
- SDLoc DL(Offset);
-
- if (!isa<ConstantSDNode>(Offset))
- return false;
-
- return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
- SDValue &SOffset,
- SDValue &ImmOffset,
- SDValue &VOffset) const {
- SDLoc DL(Offset);
-
- // Don't generate an unnecessary voffset for constant offsets.
- if (isa<ConstantSDNode>(Offset)) {
- SDValue Tmp1, Tmp2;
-
- // When necessary, use a voffset in <= CI anyway to work around a hardware
- // bug.
- if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
- SelectMUBUFConstant(Offset, Tmp1, Tmp2))
- return false;
- }
-
- if (CurDAG->isBaseWithConstantOffset(Offset)) {
- SDValue N0 = Offset.getOperand(0);
- SDValue N1 = Offset.getOperand(1);
- if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
- SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
- VOffset = N0;
- return true;
- }
- }
-
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- VOffset = Offset;
-
- return true;
-}
-
template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
SDValue &VAddr,
@@ -1525,9 +1454,13 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
// (add n0, c0)
- Base = N0;
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
- return true;
+ // Don't peel off the offset (c0) if doing so could possibly lead
+ // the base (n0) to be negative.
+ if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
+ Base = N0;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
+ return true;
+ }
}
if (isa<ConstantSDNode>(Index))
@@ -1768,7 +1701,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
SelectCode(N);
return;
}
@@ -1816,9 +1749,8 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
return;
}
- MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
- *MMOs = Mem->getMemOperand();
- CmpSwap->setMemRefs(MMOs, MMOs + 1);
+ MachineMemOperand *MMO = Mem->getMemOperand();
+ CurDAG->setNodeMemRefs(CmpSwap, {MMO});
unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
SDValue Extract
@@ -2117,6 +2049,80 @@ bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
return isExtractHiElt(In, Src);
}
+bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return false;
+ }
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIInstrInfo * SII =
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+ unsigned Limit = 0;
+ bool AllUsesAcceptSReg = true;
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ Limit < 10 && U != E; ++U, ++Limit) {
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+
+ // If the register class is unknown, it could be an unknown
+ // register class that needs to be an SGPR, e.g. an inline asm
+ // constraint
+ if (!RC || SIRI->isSGPRClass(RC))
+ return false;
+
+ if (RC != &AMDGPU::VS_32RegClass) {
+ AllUsesAcceptSReg = false;
+ SDNode * User = *U;
+ if (User->isMachineOpcode()) {
+ unsigned Opc = User->getMachineOpcode();
+ MCInstrDesc Desc = SII->get(Opc);
+ if (Desc.isCommutable()) {
+ unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
+ unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
+ unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
+ const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
+ if (CommutedRC == &AMDGPU::VS_32RegClass)
+ AllUsesAcceptSReg = true;
+ }
+ }
+ }
+ // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+ // commuting current user. This means have at least one use
+ // that strictly require VGPR. Thus, we will not attempt to commute
+ // other user instructions.
+ if (!AllUsesAcceptSReg)
+ break;
+ }
+ }
+ return !AllUsesAcceptSReg && (Limit < 10);
+}
+
+bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
+ auto Ld = cast<LoadSDNode>(N);
+
+ return Ld->getAlignment() >= 4 &&
+ (
+ (
+ (
+ Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
+ )
+ &&
+ !N->isDivergent()
+ )
+ ||
+ (
+ Subtarget->getScalarizeGlobalBehavior() &&
+ Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !Ld->isVolatile() &&
+ !N->isDivergent() &&
+ static_cast<const SITargetLowering *>(
+ getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
+ )
+ );
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
@@ -2152,10 +2158,10 @@ bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
}
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 21e44e9589d3..6951c915b177 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -128,10 +128,8 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
}
unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
- KnownBits Known;
EVT VT = Op.getValueType();
- DAG.computeKnownBits(Op, Known);
-
+ KnownBits Known = DAG.computeKnownBits(Op);
return VT.getSizeInBits() - Known.countMinLeadingZeros();
}
@@ -146,7 +144,6 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
- AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -318,6 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG, MVT::f32, Custom);
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
+ setOperationAction(ISD::FEXP, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
@@ -450,6 +448,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -470,6 +469,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
// This causes using an unrolled select operation rather than expansion with
@@ -550,6 +550,8 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FMAD:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FSIN:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -562,6 +564,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case AMDGPUISD::FMUL_LEGACY:
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY:
+ case AMDGPUISD::FMED3:
return true;
default:
return false;
@@ -650,8 +653,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
}
bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
- ISD::LoadExtType,
+ ISD::LoadExtType ExtTy,
EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
+ return false;
unsigned NewSize = NewVT.getStoreSizeInBits();
@@ -662,6 +668,18 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
EVT OldVT = N->getValueType(0);
unsigned OldSize = OldVT.getStoreSizeInBits();
+ MemSDNode *MN = cast<MemSDNode>(N);
+ unsigned AS = MN->getAddressSpace();
+ // Do not shrink an aligned scalar load to sub-dword.
+ // Scalar engine cannot do sub-dword loads.
+ if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
+ (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ (isa<LoadSDNode>(N) &&
+ AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
+ AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
+ return false;
+
// Don't produce extloads from sub 32-bit types. SI doesn't have scalar
// extloads, so doing one requires using a buffer_load. In cases where we
// still couldn't use a scalar load, using the wider load shouldn't really
@@ -722,7 +740,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
{
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
if (L->getMemOperand()->getAddrSpace()
- == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+ == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
return false;
}
@@ -1140,6 +1158,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
case ISD::FLOG10:
return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
+ case ISD::FEXP:
+ return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
@@ -1188,8 +1208,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
- if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
- G->getAddressSpace() == AMDGPUASI.REGION_ADDRESS) {
+ if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
@@ -2213,6 +2233,34 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
}
+// Return M_LOG2E of appropriate type
+static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
+ switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
+ case MVT::f16:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
+ SL, VT);
+ case MVT::f64:
+ return DAG.getConstantFP(
+ APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
+// exp2(M_LOG2E_F * f);
+SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ const SDValue K = getLog2EVal(DAG, SL, VT);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
+ return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
+}
+
static bool isCtlzOpc(unsigned Opc) {
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
}
@@ -2669,21 +2717,33 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
}
-static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
- TargetLowering::DAGCombinerInfo &DCI) {
-
+static SDValue simplifyI24(SDNode *Node24,
+ TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
- SDValue Op = Node24->getOperand(OpIdx);
+ SDValue LHS = Node24->getOperand(0);
+ SDValue RHS = Node24->getOperand(1);
+
+ APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
+
+ // First try to simplify using GetDemandedBits which allows the operands to
+ // have other uses, but will only perform simplifications that involve
+ // bypassing some nodes for this user.
+ SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
+ SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+ if (DemandedLHS || DemandedRHS)
+ return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
+ DemandedLHS ? DemandedLHS : LHS,
+ DemandedRHS ? DemandedRHS : RHS);
+
+ // Now try SimplifyDemandedBits which can simplify the nodes used by our
+ // operands if this node is the only user.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = Op.getValueType();
+ if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
+ return SDValue(Node24, 0);
+ if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
+ return SDValue(Node24, 0);
- APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
- if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
- return true;
-
- return false;
+ return SDValue();
}
template <typename IntTy>
@@ -2920,8 +2980,7 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
// shl (ext x) => zext (shl x), if shift does not overflow int
if (VT != MVT::i64)
break;
- KnownBits Known;
- DAG.computeKnownBits(X, Known);
+ KnownBits Known = DAG.computeKnownBits(X);
unsigned LZ = Known.countMinLeadingZeros();
if (LZ < RHSVal)
break;
@@ -3080,8 +3139,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
Src.getOpcode() == ISD::SRA ||
Src.getOpcode() == ISD::SHL)) {
SDValue Amt = Src.getOperand(1);
- KnownBits Known;
- DAG.computeKnownBits(Amt, Known);
+ KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
(Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
@@ -3233,8 +3291,8 @@ SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
SelectionDAG &DAG = DCI.DAG;
// Simplify demanded bits before splitting into multiple users.
- if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
- return SDValue();
+ if (SDValue V = simplifyI24(N, DCI))
+ return V;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3449,9 +3507,27 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
}
-static bool isConstantFPZero(SDValue N) {
- if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
- return C->isZero() && !C->isNegative();
+static bool isInv2Pi(const APFloat &APF) {
+ static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+ static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+ static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
+
+ return APF.bitwiseIsEqual(KF16) ||
+ APF.bitwiseIsEqual(KF32) ||
+ APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
+ if (C->isZero() && !C->isNegative())
+ return true;
+
+ if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
+ return true;
+ }
+
return false;
}
@@ -3461,6 +3537,10 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINNUM;
case ISD::FMINNUM:
return ISD::FMAXNUM;
+ case ISD::FMAXNUM_IEEE:
+ return ISD::FMINNUM_IEEE;
+ case ISD::FMINNUM_IEEE:
+ return ISD::FMAXNUM_IEEE;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
@@ -3566,6 +3646,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
@@ -3577,9 +3659,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue RHS = N0.getOperand(1);
// 0 doesn't have a negated inline immediate.
- // TODO: Shouldn't fold 1/2pi either, and should be generalized to other
- // operations.
- if (isConstantFPZero(RHS))
+ // TODO: This constant check should be generalized to other operations.
+ if (isConstantCostlierToNegate(RHS))
return SDValue();
SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
@@ -3591,6 +3672,16 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
}
+ case AMDGPUISD::FMED3: {
+ SDValue Ops[3];
+ for (unsigned I = 0; I < 3; ++I)
+ Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
+
+ SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+ if (!N0.hasOneUse())
+ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+ return Res;
+ }
case ISD::FP_EXTEND:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -3737,9 +3828,10 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
if (Src.getValueType() == MVT::i64) {
SDLoc SL(N);
uint64_t CVal = C->getZExtValue();
- return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
- DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
- DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+ DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
}
}
@@ -3786,9 +3878,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::MUL_U24:
case AMDGPUISD::MULHI_I24:
case AMDGPUISD::MULHI_U24: {
- // If the first call to simplify is successfull, then N may end up being
- // deleted, so we shouldn't call simplifyI24 again.
- simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
+ if (SDValue V = simplifyI24(N, DCI))
+ return V;
return SDValue();
}
case AMDGPUISD::MUL_LOHI_I24:
@@ -3943,13 +4034,12 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
- SDValue StackPtr,
SDValue ArgVal,
int64_t Offset) const {
MachineFunction &MF = DAG.getMachineFunction();
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
- SDValue Ptr = DAG.getObjectPtrOffset(SL, StackPtr, Offset);
+ SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
MachineMemOperand::MODereferenceable);
return Store;
@@ -4111,6 +4201,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
+ NODE_NAME_CASE(SBUFFER_LOAD)
NODE_NAME_CASE(BUFFER_STORE)
NODE_NAME_CASE(BUFFER_STORE_FORMAT)
NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
@@ -4210,33 +4301,42 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
}
case AMDGPUISD::MUL_U24:
case AMDGPUISD::MUL_I24: {
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
-
+ KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
RHSKnown.countMinTrailingZeros();
Known.Zero.setLowBits(std::min(TrailZ, 32u));
- unsigned LHSValBits = 32 - std::max(LHSKnown.countMinSignBits(), 8u);
- unsigned RHSValBits = 32 - std::max(RHSKnown.countMinSignBits(), 8u);
- unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
- if (MaxValBits >= 32)
- break;
+ // Truncate to 24 bits.
+ LHSKnown = LHSKnown.trunc(24);
+ RHSKnown = RHSKnown.trunc(24);
+
bool Negative = false;
if (Opc == AMDGPUISD::MUL_I24) {
- bool LHSNegative = !!(LHSKnown.One & (1 << 23));
- bool LHSPositive = !!(LHSKnown.Zero & (1 << 23));
- bool RHSNegative = !!(RHSKnown.One & (1 << 23));
- bool RHSPositive = !!(RHSKnown.Zero & (1 << 23));
+ unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
+ unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
+ unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+ if (MaxValBits >= 32)
+ break;
+ bool LHSNegative = LHSKnown.isNegative();
+ bool LHSPositive = LHSKnown.isNonNegative();
+ bool RHSNegative = RHSKnown.isNegative();
+ bool RHSPositive = RHSKnown.isNonNegative();
if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
break;
Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
- }
- if (Negative)
- Known.One.setHighBits(32 - MaxValBits);
- else
+ if (Negative)
+ Known.One.setHighBits(32 - MaxValBits);
+ else
+ Known.Zero.setHighBits(32 - MaxValBits);
+ } else {
+ unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
+ unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
+ unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+ if (MaxValBits >= 32)
+ break;
Known.Zero.setHighBits(32 - MaxValBits);
+ }
break;
}
case AMDGPUISD::PERM: {
@@ -4244,9 +4344,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
if (!CMask)
return;
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
+ KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
unsigned Sel = CMask->getZExtValue();
for (unsigned I = 0; I < 32; I += 8) {
@@ -4320,3 +4419,107 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
}
+
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case AMDGPUISD::FMIN_LEGACY:
+ case AMDGPUISD::FMAX_LEGACY: {
+ if (SNaN)
+ return true;
+
+ // TODO: Can check no nans on one of the operands for each one, but which
+ // one?
+ return false;
+ }
+ case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::CVT_PKRTZ_F16_F32: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMIN3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMAD_FTZ: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
+ return true;
+
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::RCP_LEGACY:
+ case AMDGPUISD::RSQ_LEGACY:
+ case AMDGPUISD::RSQ_CLAMP: {
+ if (SNaN)
+ return true;
+
+ // TODO: Need is known positive check.
+ return false;
+ }
+ case AMDGPUISD::LDEXP:
+ case AMDGPUISD::FRACT: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+ case AMDGPUISD::DIV_SCALE:
+ case AMDGPUISD::DIV_FMAS:
+ case AMDGPUISD::DIV_FIXUP:
+ case AMDGPUISD::TRIG_PREOP:
+ // TODO: Refine on operands.
+ return SNaN;
+ case AMDGPUISD::SIN_HW:
+ case AMDGPUISD::COS_HW: {
+ // TODO: Need check for infinity
+ return SNaN;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID
+ = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Handle more intrinsics
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_cubeid:
+ return true;
+
+ case Intrinsic::amdgcn_frexp_mant: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+ }
+ case Intrinsic::amdgcn_fdot2:
+ // TODO: Refine on operand
+ return SNaN;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+TargetLowering::AtomicExpansionKind
+AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ if (RMW->getOperation() == AtomicRMWInst::Nand)
+ return AtomicExpansionKind::CmpXChg;
+ return AtomicExpansionKind::None;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a4c3b413e103..0d22cb2e3e20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -41,8 +41,6 @@ public:
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
- AMDGPUAS AMDGPUASI;
-
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
/// Split a vector store into multiple scalar stores.
@@ -58,8 +56,9 @@ protected:
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFLOG(SDValue Op, SelectionDAG &Dag,
+ SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
double Log2BaseInverted) const;
+ SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
@@ -95,6 +94,8 @@ protected:
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isConstantCostlierToNegate(SDValue N) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -246,6 +247,11 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const override;
+
/// Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
@@ -279,7 +285,6 @@ public:
SDValue storeStackInputValue(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
- SDValue StackPtr,
SDValue ArgVal,
int64_t Offset) const;
@@ -299,13 +304,11 @@ public:
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
const ImplicitParameter Param) const;
- AMDGPUAS getAMDGPUAS() const {
- return AMDGPUASI;
- }
-
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
};
namespace AMDGPUISD {
@@ -357,6 +360,7 @@ enum NodeType : unsigned {
SIN_HW,
FMAX_LEGACY,
FMIN_LEGACY,
+
FMAX3,
SMAX3,
UMAX3,
@@ -479,6 +483,7 @@ enum NodeType : unsigned {
BUFFER_LOAD,
BUFFER_LOAD_FORMAT,
BUFFER_LOAD_FORMAT_D16,
+ SBUFFER_LOAD,
BUFFER_STORE,
BUFFER_STORE_FORMAT,
BUFFER_STORE_FORMAT_D16,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 35dd9eb0a478..945c9acd379a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -44,7 +44,7 @@ ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
-// it into registers we gain nothing by agressively inlining functions for that
+// it into registers we gain nothing by aggressively inlining functions for that
// heuristic.
static cl::opt<unsigned>
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
@@ -118,8 +118,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
if (!Callee)
return (unsigned)Thres;
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*Caller->getParent());
-
// If we have a pointer to private array passed into a function
// it will not be optimized out, leaving scratch usage.
// Increase the inline threshold to allow inliniting in this case.
@@ -128,7 +126,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
for (Value *PtrArg : CS.args()) {
Type *Ty = PtrArg->getType();
if (!Ty->isPointerTy() ||
- Ty->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+ Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
PtrArg = GetUnderlyingObject(PtrArg, DL);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
@@ -174,18 +172,23 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
Function *Caller = CS.getCaller();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
- if (!Callee || Callee->isDeclaration() || CS.isNoInline() ||
- !TTI.areInlineCompatible(Caller, Callee))
- return llvm::InlineCost::getNever();
+ if (!Callee || Callee->isDeclaration())
+ return llvm::InlineCost::getNever("undefined callee");
+
+ if (CS.isNoInline())
+ return llvm::InlineCost::getNever("noinline");
+
+ if (!TTI.areInlineCompatible(Caller, Callee))
+ return llvm::InlineCost::getNever("incompatible");
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
- return llvm::InlineCost::getAlways();
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getAlways("alwaysinline viable");
+ return llvm::InlineCost::getNever("alwaysinline unviable");
}
if (isWrapperOnlyCall(CS))
- return llvm::InlineCost::getAlways();
+ return llvm::InlineCost::getAlways("wrapper-only call");
InlineParams LocalParams = Params;
LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 7442a59e594f..82644be26563 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -62,18 +62,10 @@ def AMDGPULoopOp : SDTypeProfile<0, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
>;
-def AMDGPUBreakOp : SDTypeProfile<1, 1,
- [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
->;
-
def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
>;
-def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
->;
-
def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 219d430fbb39..8eb49d49b2e0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -55,7 +55,6 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AMDGPUGenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
- ,AMDGPUASI(STI.getAMDGPUAS())
{
}
@@ -506,8 +505,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
if (!I.hasOneMemOperand())
return false;
- if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
- (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+ if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return false;
if (!isInstrUniform(I))
@@ -631,6 +630,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_ADD:
return selectG_ADD(I);
+ case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
return selectCOPY(I);
case TargetOpcode::G_CONSTANT:
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 68b40b20aca2..449431adc561 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -105,9 +105,6 @@ private:
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AMDGPUGenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
-
-protected:
- AMDGPUAS AMDGPUASI;
};
} // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c9c932ef2f5f..eb8f2002ff2d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -135,6 +135,12 @@ def brtarget : Operand<OtherVT>;
// Misc. PatFrags
//===----------------------------------------------------------------------===//
+class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0),
+ (op $src0),
+ [{ return N->hasOneUse(); }]
+>;
+
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
@@ -152,13 +158,21 @@ def smax_oneuse : HasOneUseBinOp<smax>;
def smin_oneuse : HasOneUseBinOp<smin>;
def umax_oneuse : HasOneUseBinOp<umax>;
def umin_oneuse : HasOneUseBinOp<umin>;
+
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+
+def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
+def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
+
+
def and_oneuse : HasOneUseBinOp<and>;
def or_oneuse : HasOneUseBinOp<or>;
def xor_oneuse : HasOneUseBinOp<xor>;
} // Properties = [SDNPCommutative, SDNPAssociative]
+def not_oneuse : HasOneUseUnaryOp<not>;
+
def add_oneuse : HasOneUseBinOp<add>;
def sub_oneuse : HasOneUseBinOp<sub>;
@@ -167,6 +181,9 @@ def shl_oneuse : HasOneUseBinOp<shl>;
def select_oneuse : HasOneUseTernaryOp<select>;
+def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
+def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
+
def srl_16 : PatFrag<
(ops node:$src0), (srl_oneuse node:$src0, (i32 16))
>;
@@ -328,37 +345,37 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
>;
class PrivateAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
}]>;
class ConstantAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class LocalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
class GlobalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class GlobalLoadAddress : CodePatPred<[{
auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class FlatLoadAddress : CodePatPred<[{
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
class FlatStoreAddress : CodePatPred<[{
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.GLOBAL_ADDRESS;
+ return AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
@@ -480,7 +497,7 @@ def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
@@ -497,14 +514,14 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
(ops node:$ptr, node:$cmp, node:$swap),
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
- return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+ return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
@@ -513,17 +530,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
def "" : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def _ret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
}
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -550,12 +567,12 @@ def atomic_cmp_swap_global : PatFrag<
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def atomic_cmp_swap_global_ret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
@@ -787,18 +804,30 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(BIT_ALIGN $src0, $src0, $src1)
>;
-// This matches 16 permutations of
-// max(min(x, y), min(max(x, y), z))
-class IntMed3Pat<Instruction med3Inst,
+multiclass IntMed3Pat<Instruction med3Inst,
+ SDPatternOperator min,
SDPatternOperator max,
- SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
- ValueType vt = i32> : AMDGPUPat<
+ SDPatternOperator max_oneuse,
+ ValueType vt = i32> {
+
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : AMDGPUPat <
+ (min (max_oneuse vt:$src0, vt:$src1),
+ (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+ (med3Inst vt:$src0, vt:$src1, vt:$src2)
+>;
+
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : AMDGPUPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst $src0, $src1, $src2)
>;
-
+}
+
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
@@ -813,6 +842,7 @@ def cvt_flr_i32_f32 : PatFrag <
[{ (void)N; return TM.Options.NoNaNsFPMath; }]
>;
+let AddedComplexity = 2 in {
class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
!if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
@@ -824,6 +854,7 @@ class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
!if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
(Inst $src0, $src1, $src2))
>;
+} // AddedComplexity.
class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
(fdiv FP_ONE, vt:$src),
@@ -834,3 +865,25 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
+
+// Instructions which select to the same v_min_f*
+def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee node:$src0, node:$src1),
+ (fminnum node:$src0, node:$src1)]
+>;
+
+// Instructions which select to the same v_max_f*
+def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee node:$src0, node:$src1),
+ (fmaxnum node:$src0, node:$src1)]
+>;
+
+def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fminnum_ieee_oneuse node:$src0, node:$src1),
+ (fminnum_oneuse node:$src0, node:$src1)]
+>;
+
+def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+ [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
+ (fmaxnum_oneuse node:$src0, node:$src1)]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
index 896e2055cf62..02108ca3ddd7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -40,7 +40,7 @@ StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
if (IntrID < Intrinsic::num_intrinsics)
return StringRef();
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+ assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics &&
"Invalid intrinsic ID");
return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
@@ -91,7 +91,7 @@ Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
AttributeList AS =
- getAttributes(M->getContext(), static_cast<AMDGPUIntrinsic::ID>(IntrID));
+ getAttributes(M->getContext(), static_cast<SIIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
index ef42f9a319af..a1a094dded23 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
@@ -20,7 +20,7 @@
namespace llvm {
class TargetMachine;
-namespace AMDGPUIntrinsic {
+namespace SIIntrinsic {
enum ID {
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
#define GET_INTRINSIC_ENUM_VALUES
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 87b072c9ea20..ef85c1040545 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -32,20 +32,52 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
};
- auto AMDGPUAS = ST.getAMDGPUAS();
-
const LLT S1 = LLT::scalar(1);
- const LLT V2S16 = LLT::vector(2, 16);
-
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT S512 = LLT::scalar(512);
+ const LLT V2S16 = LLT::vector(2, 16);
+ const LLT V4S16 = LLT::vector(4, 16);
+ const LLT V8S16 = LLT::vector(8, 16);
+
+ const LLT V2S32 = LLT::vector(2, 32);
+ const LLT V3S32 = LLT::vector(3, 32);
+ const LLT V4S32 = LLT::vector(4, 32);
+ const LLT V5S32 = LLT::vector(5, 32);
+ const LLT V6S32 = LLT::vector(6, 32);
+ const LLT V7S32 = LLT::vector(7, 32);
+ const LLT V8S32 = LLT::vector(8, 32);
+ const LLT V9S32 = LLT::vector(9, 32);
+ const LLT V10S32 = LLT::vector(10, 32);
+ const LLT V11S32 = LLT::vector(11, 32);
+ const LLT V12S32 = LLT::vector(12, 32);
+ const LLT V13S32 = LLT::vector(13, 32);
+ const LLT V14S32 = LLT::vector(14, 32);
+ const LLT V15S32 = LLT::vector(15, 32);
+ const LLT V16S32 = LLT::vector(16, 32);
+
+ const LLT V2S64 = LLT::vector(2, 64);
+ const LLT V3S64 = LLT::vector(3, 64);
+ const LLT V4S64 = LLT::vector(4, 64);
+ const LLT V5S64 = LLT::vector(5, 64);
+ const LLT V6S64 = LLT::vector(6, 64);
+ const LLT V7S64 = LLT::vector(7, 64);
+ const LLT V8S64 = LLT::vector(8, 64);
+
+ std::initializer_list<LLT> AllS32Vectors =
+ {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
+ V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
+ std::initializer_list<LLT> AllS64Vectors =
+ {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
+
const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
- const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS.FLAT_ADDRESS);
- const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS.PRIVATE_ADDRESS);
+ const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
+ const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
+
+ const LLT CodePtr = FlatPtr;
const LLT AddrSpaces[] = {
GlobalPtr,
@@ -55,13 +87,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
PrivatePtr
};
+ setAction({G_BRCOND, S1}, Legal);
+
setAction({G_ADD, S32}, Legal);
setAction({G_ASHR, S32}, Legal);
setAction({G_SUB, S32}, Legal);
setAction({G_MUL, S32}, Legal);
- setAction({G_AND, S32}, Legal);
- setAction({G_OR, S32}, Legal);
- setAction({G_XOR, S32}, Legal);
+
+ // FIXME: 64-bit ones only legal for scalar
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+ .legalFor({S32, S1, S64, V2S32});
+
+ getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
+ G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
+ .legalFor({{S32, S1}});
setAction({G_BITCAST, V2S16}, Legal);
setAction({G_BITCAST, 1, S32}, Legal);
@@ -90,35 +129,80 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
// between these two scenarios.
setAction({G_CONSTANT, S1}, Legal);
- setAction({G_FADD, S32}, Legal);
+ setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
+
+ getActionDefinitionsBuilder(
+ { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+ .legalFor({S32, S64});
+
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor({{S32, S64}});
+
+ // Use actual fsub instruction
+ setAction({G_FSUB, S32}, Legal);
+
+ // Must use fadd + fneg
+ setAction({G_FSUB, S64}, Lower);
setAction({G_FCMP, S1}, Legal);
setAction({G_FCMP, 1, S32}, Legal);
setAction({G_FCMP, 1, S64}, Legal);
- setAction({G_FMUL, S32}, Legal);
-
setAction({G_ZEXT, S64}, Legal);
setAction({G_ZEXT, 1, S32}, Legal);
+ setAction({G_SEXT, S64}, Legal);
+ setAction({G_SEXT, 1, S32}, Legal);
+
+ setAction({G_ANYEXT, S64}, Legal);
+ setAction({G_ANYEXT, 1, S32}, Legal);
+
setAction({G_FPTOSI, S32}, Legal);
setAction({G_FPTOSI, 1, S32}, Legal);
setAction({G_SITOFP, S32}, Legal);
setAction({G_SITOFP, 1, S32}, Legal);
+ setAction({G_UITOFP, S32}, Legal);
+ setAction({G_UITOFP, 1, S32}, Legal);
+
setAction({G_FPTOUI, S32}, Legal);
setAction({G_FPTOUI, 1, S32}, Legal);
+ setAction({G_FPOW, S32}, Legal);
+ setAction({G_FEXP2, S32}, Legal);
+ setAction({G_FLOG2, S32}, Legal);
+
+ getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
+ .legalFor({S32, S64});
+
for (LLT PtrTy : AddrSpaces) {
LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
setAction({G_GEP, PtrTy}, Legal);
setAction({G_GEP, 1, IdxTy}, Legal);
}
+ setAction({G_BLOCK_ADDR, CodePtr}, Legal);
+
setAction({G_ICMP, S1}, Legal);
setAction({G_ICMP, 1, S32}, Legal);
+ setAction({G_CTLZ, S32}, Legal);
+ setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
+ setAction({G_CTTZ, S32}, Legal);
+ setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
+ setAction({G_BSWAP, S32}, Legal);
+ setAction({G_CTPOP, S32}, Legal);
+
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalIf([](const LegalityQuery &Query) {
+ return true;
+ });
+
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalIf([](const LegalityQuery &Query) {
+ return true;
+ });
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalIf([=, &ST](const LegalityQuery &Query) {
@@ -145,6 +229,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
});
+ auto &Atomics = getActionDefinitionsBuilder(
+ {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
+ G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
+ G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
+ G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
+ .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
+ {S64, GlobalPtr}, {S64, LocalPtr}});
+ if (ST.hasFlatAddressSpace()) {
+ Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
+ }
setAction({G_SELECT, S32}, Legal);
setAction({G_SELECT, 1, S1}, Legal);
@@ -180,6 +274,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
(Ty1.getSizeInBits() % 32 == 0);
});
+ getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V16S32)
+ .clampNumElements(0, V2S64, V8S64)
+ .minScalarSameAs(1, 0);
+
+ // TODO: Support any combination of v2s32
+ getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+ .legalFor({{V4S32, V2S32},
+ {V8S32, V2S32},
+ {V8S32, V4S32},
+ {V4S64, V2S64},
+ {V4S16, V2S16},
+ {V8S16, V2S16},
+ {V8S16, V4S16}});
+
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 7a7ed7a4f065..14e880042691 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1333,8 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
- nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
+ nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
if (!Fsincos) return false;
@@ -1347,7 +1346,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
- if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+ if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index f37795e961e8..4fc3fe0f105b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -995,8 +995,10 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
} else {
AttributeList Attr;
LLVMContext &Ctx = M->getContext();
- Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::ReadOnly);
- Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::NoUnwind);
+ Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index c147830e12ed..743dc7a0d00b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -16,7 +16,6 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -84,8 +83,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
return false;
CallInst *KernArgSegment =
- Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, nullptr,
- F.getName() + ".kernarg.segment");
+ Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
+ nullptr, F.getName() + ".kernarg.segment");
KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
KernArgSegment->addAttribute(AttributeList::ReturnIndex,
@@ -123,14 +122,17 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
VectorType *VT = dyn_cast<VectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
+ bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
+
VectorType *V4Ty = nullptr;
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset);
+ unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
+ KernArgBaseAlign);
Value *ArgPtr;
- if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types
+ if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
// bits.
@@ -148,7 +150,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
KernArgSegment,
- AlignDownOffset,
+ EltOffset,
Arg.getName() + ".kernarg.offset");
ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
@@ -199,7 +201,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
- if (Size < 32 && !ArgTy->isAggregateType()) {
+ if (DoShiftOpt) {
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 1876dc3f7122..f6bdbf5e9be2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
+#ifdef EXPENSIVE_CHECKS
+ // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
+ // work correctly for the generic CPU).
+ //
+ // The isPseudo check really shouldn't be here, but unfortunately there are
+ // some negative lit tests that depend on being able to continue through
+ // here even when pseudo instructions haven't been lowered.
+ if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) {
+ SmallVector<MCFixup, 4> Fixups;
+ SmallVector<char, 16> CodeBytes;
+ raw_svector_ostream CodeStream(CodeBytes);
+
+ std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
+ *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext));
+ InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
+
+ assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
+ }
+#endif
+
if (STI.dumpCode()) {
// Disassemble instruction/operands to text.
DisasmLines.resize(DisasmLines.size() + 1);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 995d9ae3907f..5e0b7d429022 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -42,9 +42,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
if (!FirstMI)
return true;
+ const MachineBasicBlock &MBB = *FirstMI->getParent();
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const MachineOperand *Src2 = TII.getNamedOperand(SecondMI,
AMDGPU::OpName::src2);
- return FirstMI->definesRegister(Src2->getReg());
+ return FirstMI->definesRegister(Src2->getReg(), TRI);
}
default:
return false;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index b50a2eb8e9e7..2feff14d34a1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -23,7 +23,8 @@ namespace ElfNote {
const char SectionName[] = ".note";
-const char NoteName[] = "AMD";
+const char NoteNameV2[] = "AMD";
+const char NoteNameV3[] = "AMDGPU";
// TODO: Remove this file once we drop code object v2.
enum NoteType{
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 3cfdccc9fe51..e53a8fe7c074 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -99,8 +99,6 @@ private:
const DataLayout *DL;
- AMDGPUAS AS;
-
const TargetLowering *TLI;
void visit(const Function &F);
@@ -267,7 +265,6 @@ void AMDGPUPerfHint::runOnFunction(Function &F) {
const Module &M = *F.getParent();
DL = &M.getDataLayout();
- AS = AMDGPU::getAMDGPUAS(M);
visit(F);
auto Loc = FIM.find(&F);
@@ -306,14 +303,14 @@ bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType())) {
unsigned As = PT->getAddressSpace();
// Flat likely points to global too.
- return As == AS.GLOBAL_ADDRESS || As == AS.FLAT_ADDRESS;
+ return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
}
return false;
}
bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType()))
- return PT->getAddressSpace() == AS.LOCAL_ADDRESS;
+ return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
return false;
}
@@ -346,7 +343,8 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
if (auto PT = dyn_cast<PointerType>(V->getType())) {
unsigned As = PT->getAddressSpace();
- return As == AS.CONSTANT_ADDRESS || As == AS.CONSTANT_ADDRESS_32BIT;
+ return As == AMDGPUAS::CONSTANT_ADDRESS ||
+ As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
}
return false;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index d341fec6296f..5d087c099184 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -70,13 +70,17 @@ static cl::opt<bool> DisablePromoteAllocaToVector(
cl::desc("Disable promote alloca to vector"),
cl::init(false));
+static cl::opt<bool> DisablePromoteAllocaToLDS(
+ "disable-promote-alloca-to-lds",
+ cl::desc("Disable promote alloca to LDS"),
+ cl::init(false));
+
// FIXME: This can create globals so should be a module pass.
class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
- AMDGPUAS AS;
// FIXME: This should be per-kernel.
uint32_t LocalMemLimit = 0;
@@ -156,8 +160,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
if (!ST.isPromoteAllocaEnabled())
return false;
- AS = AMDGPU::getAMDGPUAS(*F.getParent());
-
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();
@@ -238,7 +240,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
- DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
+ DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
@@ -326,6 +328,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
// Currently only handle the case where the Pointer Operand is a GEP.
// Also we could not vectorize volatile or atomic loads.
LoadInst *LI = cast<LoadInst>(Inst);
+ if (isa<AllocaInst>(User) &&
+ LI->getPointerOperandType() == User->getType() &&
+ isa<VectorType>(LI->getType()))
+ return true;
return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
}
case Instruction::BitCast:
@@ -335,6 +341,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
// since it should be canonical form, the User should be a GEP.
// Also we could not vectorize volatile or atomic stores.
StoreInst *SI = cast<StoreInst>(Inst);
+ if (isa<AllocaInst>(User) &&
+ SI->getPointerOperandType() == User->getType() &&
+ isa<VectorType>(SI->getValueOperand()->getType()))
+ return true;
return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
}
default:
@@ -342,14 +352,15 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
}
}
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
if (DisablePromoteAllocaToVector) {
LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
return false;
}
- ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
+ Type *AT = Alloca->getAllocatedType();
+ SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -396,7 +407,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
}
}
- VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+ VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
+ if (!VectorTy)
+ VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
@@ -406,7 +419,10 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
+ if (Inst->getType() == AT)
+ break;
+
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -418,9 +434,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
break;
}
case Instruction::Store: {
- Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
-
StoreInst *SI = cast<StoreInst>(Inst);
+ if (SI->getValueOperand()->getType() == AT)
+ break;
+
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -610,7 +628,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
// we cannot use local memory in the pass.
for (Type *ParamTy : FTy->params()) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
- if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
+ if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
LocalMemLimit = 0;
LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
"local memory disabled.\n");
@@ -627,7 +645,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
+ if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@@ -706,9 +724,12 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I, AS))
+ if (tryPromoteAllocaToVector(&I))
return true; // Promoted to vector.
+ if (DisablePromoteAllocaToLDS)
+ return false;
+
const Function &ContainingFunction = *I.getParent()->getParent();
CallingConv::ID CC = ContainingFunction.getCallingConv();
@@ -775,7 +796,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
- AS.LOCAL_ADDRESS);
+ AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
@@ -808,7 +829,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
Value *Src0 = CI->getOperand(0);
Type *EltTy = Src0->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -825,7 +846,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
continue;
Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@@ -894,7 +915,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Type *SrcTy = Src->getType()->getPointerElementType();
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
Intrinsic::objectsize,
- { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
+ { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
);
CallInst *NewCall = Builder.CreateCall(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 012e4fe200aa..7a760dcf7a90 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -35,7 +35,7 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
: AMDGPUGenRegisterBankInfo(),
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
- // HACK: Until this is fully tablegen'd
+ // HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false;
if (AlreadyInit)
return;
@@ -74,13 +74,16 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
- Src.getID() == AMDGPU::VGPRRegBankID)
+ Src.getID() == AMDGPU::VGPRRegBankID) {
return std::numeric_limits<unsigned>::max();
+ }
// SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
// the valu.
if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
- Src.getID() == AMDGPU::SGPRRegBankID)
+ (Src.getID() == AMDGPU::SGPRRegBankID ||
+ Src.getID() == AMDGPU::VGPRRegBankID ||
+ Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
return RegisterBankInfo::copyCost(Dst, Src, Size);
@@ -145,7 +148,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&SSMapping);
const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -153,7 +156,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&SVMapping);
const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
@@ -161,7 +164,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&VSMapping);
const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr, // Predicate operand.
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -170,6 +173,67 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
+ case TargetOpcode::G_SELECT: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 4); // Num Operands
+ AltMappings.push_back(&SSMapping);
+
+ const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
+ 4); // Num Operands
+ AltMappings.push_back(&VVMapping);
+
+ return AltMappings;
+ }
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+ getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
+ 5); // Num Operands
+ AltMappings.push_back(&SSMapping);
+
+ const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
+ 5); // Num Operands
+ AltMappings.push_back(&VVMapping);
+ return AltMappings;
+ }
+ case AMDGPU::G_BRCOND: {
+ assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+
+ const InstructionMapping &SMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
+ 2); // Num Operands
+ AltMappings.push_back(&SMapping);
+
+ const InstructionMapping &VMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
+ 2); // Num Operands
+ AltMappings.push_back(&VMapping);
+ return AltMappings;
+ }
default:
break;
}
@@ -193,10 +257,16 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
unsigned Reg = MI.getOperand(i).getReg();
- const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
- if (Bank && Bank->getID() != AMDGPU::SGPRRegBankID)
- return false;
+ if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+ if (Bank->getID() == AMDGPU::VGPRRegBankID)
+ return false;
+
+ assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
+ Bank->getID() == AMDGPU::SCCRegBankID);
+ }
}
return true;
}
@@ -209,7 +279,8 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
- OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
+ OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
MI.getNumOperands());
@@ -230,12 +301,32 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
- unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI);
+
+ unsigned DefaultBankID = Size1 == 1 ?
+ AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+ unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
+
OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
- OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+ OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
+ }
+
+ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+ MI.getNumOperands());
+}
+
+const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -304,21 +395,49 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return getInvalidInstructionMapping();
+
+ case AMDGPU::G_AND:
+ case AMDGPU::G_OR:
+ case AMDGPU::G_XOR: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (Size == 1) {
+ OpdsMapping[0] = OpdsMapping[1] =
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
case AMDGPU::G_MUL:
- case AMDGPU::G_AND:
- case AMDGPU::G_OR:
- case AMDGPU::G_XOR:
case AMDGPU::G_SHL:
+ case AMDGPU::G_UADDO:
+ case AMDGPU::G_SADDO:
+ case AMDGPU::G_USUBO:
+ case AMDGPU::G_SSUBO:
+ case AMDGPU::G_UADDE:
+ case AMDGPU::G_SADDE:
+ case AMDGPU::G_USUBE:
+ case AMDGPU::G_SSUBE:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
- // Fall-through
+ LLVM_FALLTHROUGH;
case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
case AMDGPU::G_FPTOSI:
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_SITOFP:
+ case AMDGPU::G_UITOFP:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FEXP2:
+ case AMDGPU::G_FLOG2:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI);
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -326,11 +445,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_FCONSTANT:
- case AMDGPU::G_CONSTANT: {
+ case AMDGPU::G_CONSTANT:
+ case AMDGPU::G_FRAME_INDEX:
+ case AMDGPU::G_BLOCK_ADDR: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case AMDGPU::G_INSERT: {
+ unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+ AMDGPU::VGPRRegBankID;
+ unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
+ OpdsMapping[3] = nullptr;
+ break;
+ }
case AMDGPU::G_EXTRACT: {
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -352,7 +485,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
- case AMDGPU::G_BITCAST: {
+ case AMDGPU::G_BITCAST:
+ case AMDGPU::G_INTTOPTR:
+ case AMDGPU::G_PTRTOINT:
+ case AMDGPU::G_CTLZ:
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF:
+ case AMDGPU::G_CTPOP:
+ case AMDGPU::G_BSWAP:
+ case AMDGPU::G_FABS:
+ case AMDGPU::G_FNEG: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
@@ -368,7 +511,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
- case AMDGPU::G_ZEXT: {
+ case AMDGPU::G_ZEXT:
+ case AMDGPU::G_SEXT:
+ case AMDGPU::G_ANYEXT: {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -391,7 +536,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 1);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
@@ -431,7 +576,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID ?
- AMDGPU::SCCRegBankID : AMDGPU::VGPRRegBankID;
+ AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
@@ -479,6 +624,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ case AMDGPU::G_UNMERGE_VALUES: {
+ unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+ AMDGPU::VGPRRegBankID;
+
+ // Op1 and Dst should use the same register bank.
+ // FIXME: Shouldn't this be the default? Why do we need to handle this?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
+ OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
+ }
+ break;
+ }
case AMDGPU::G_INTRINSIC: {
switch (MI.getOperand(1).getIntrinsicID()) {
default:
@@ -492,6 +649,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case Intrinsic::amdgcn_wqm_vote: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = OpdsMapping[2]
+ = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
}
break;
}
@@ -528,8 +691,50 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case AMDGPU::G_SELECT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
+ Op2Bank == AMDGPU::SGPRRegBankID &&
+ Op3Bank == AMDGPU::SGPRRegBankID;
+ unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+ Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+ break;
+ }
+
case AMDGPU::G_LOAD:
return getInstrMappingForLoad(MI);
+
+ case AMDGPU::G_ATOMICRMW_XCHG:
+ case AMDGPU::G_ATOMICRMW_ADD:
+ case AMDGPU::G_ATOMICRMW_SUB:
+ case AMDGPU::G_ATOMICRMW_AND:
+ case AMDGPU::G_ATOMICRMW_OR:
+ case AMDGPU::G_ATOMICRMW_XOR:
+ case AMDGPU::G_ATOMICRMW_MAX:
+ case AMDGPU::G_ATOMICRMW_MIN:
+ case AMDGPU::G_ATOMICRMW_UMAX:
+ case AMDGPU::G_ATOMICRMW_UMIN:
+ case AMDGPU::G_ATOMIC_CMPXCHG: {
+ return getDefaultMappingAllVGPR(MI);
+ }
+ case AMDGPU::G_BRCOND: {
+ unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+ if (Bank != AMDGPU::SCCRegBankID)
+ Bank = AMDGPU::VCCRegBankID;
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
+ break;
+ }
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index d48a66589873..d29f4bc79a51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -49,6 +49,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool isSALUMapping(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
+ const InstructionMapping &getDefaultMappingAllVGPR(
+ const MachineInstr &MI) const;
public:
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 7f7f75f65647..570379a820e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -15,4 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
>;
-def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS ]>;
+def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>;
+
+// It is helpful to distinguish conditions from ordinary SGPRs.
+def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 07de5fc549e2..922d974f2ebd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -27,8 +27,6 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
AMDGPURegisterInfo();
- bool enableMultipleCopyHints() const override { return true; }
-
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index a861762a8c9e..efe501cb73c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -163,7 +163,7 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
// some casts between structs and non-structs, but we can't bitcast
// directly between them. directly bitcast between them. Blender uses
// some casts that look like { <3 x float> }* to <4 x float>*
- if ((SrcEltTy->isStructTy() && (SrcEltTy->getNumContainedTypes() != 1)))
+ if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
return false;
// Clang emits OpenCL 3-vector type accesses with a bitcast to the
@@ -401,8 +401,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (Val->getType() != EltTy) {
Type *EffectiveEltTy = EltTy;
if (StructType *CT = dyn_cast<StructType>(EltTy)) {
- assert(CT->getNumContainedTypes() == 1);
- EffectiveEltTy = CT->getContainedType(0);
+ assert(CT->getNumElements() == 1);
+ EffectiveEltTy = CT->getElementType(0);
}
if (DL->getTypeSizeInBits(EffectiveEltTy) !=
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 98b49070fa99..ed0cc70c3d9a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -74,6 +74,9 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// We want to be able to turn these off, but making this a subtarget feature
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
+ //
+ // Similarly we want enable-prt-strict-null to be on by default and not to
+ // unset everything else if it is disabled
SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
@@ -89,6 +92,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "-fp32-denormals,";
}
+ FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -124,10 +129,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
- const FeatureBitset &FeatureBits) :
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
- SubtargetFeatureBits(FeatureBits),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
@@ -136,19 +139,22 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
HasVOP3PInsts(false),
HasMulI24(true),
HasMulU24(true),
+ HasInv2PiInlineImm(false),
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
+ HasTrigReducedRange(false),
LocalMemorySize(0),
WavefrontSize(0)
{ }
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const GCNTargetMachine &TM) :
+ const GCNTargetMachine &TM) :
AMDGPUGenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
TargetTriple(TT),
Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
+ InstrItins(getInstrItineraryForCPU(GPU)),
LDSBankCount(0),
MaxPrivateElementSize(0),
@@ -170,16 +176,17 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DebuggerEmitPrologue(false),
EnableHugePrivateBuffer(false),
- EnableVGPRSpilling(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
EnableDS128(false),
+ EnablePRTStrictNull(false),
DumpCode(false),
FP64(false),
GCN3Encoding(false),
CIInsts(false),
+ VIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
@@ -189,15 +196,16 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasVGPRIndexMode(false),
HasScalarStores(false),
HasScalarAtomics(false),
- HasInv2PiInlineImm(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasR128A16(false),
HasDLInsts(false),
- D16PreservesUnusedBits(false),
+ HasDotInsts(false),
+ EnableSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
@@ -211,7 +219,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
- AS = AMDGPU::getAMDGPUAS(TT);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
@@ -447,7 +454,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
R600GenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
FMA(false),
@@ -460,8 +467,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
TexVTXClauseSize(0),
Gen(R600),
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
- InstrItins(getInstrItineraryForCPU(GPU)),
- AS (AMDGPU::getAMDGPUAS(TT)) { }
+ InstrItins(getInstrItineraryForCPU(GPU)) { }
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
@@ -480,10 +486,6 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
-bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const {
- return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
-}
-
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 623109733651..5584759e5580 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -63,7 +63,6 @@ private:
Triple TargetTriple;
protected:
- const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
@@ -72,13 +71,15 @@ protected:
bool HasVOP3PInsts;
bool HasMulI24;
bool HasMulU24;
+ bool HasInv2PiInlineImm;
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
+ bool HasTrigReducedRange;
int LocalMemorySize;
unsigned WavefrontSize;
public:
- AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+ AMDGPUSubtarget(const Triple &TT);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -134,7 +135,7 @@ public:
return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
}
- bool isAmdCodeObjectV2(const Function &F) const {
+ bool isAmdHsaOrMesa(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
}
@@ -170,10 +171,18 @@ public:
return HasMulU24;
}
+ bool hasInv2PiInlineImm() const {
+ return HasInv2PiInlineImm;
+ }
+
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
+ bool hasTrigReducedRange() const {
+ return HasTrigReducedRange;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -193,38 +202,26 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const Function &F) const {
- return isAmdCodeObjectV2(F) ? 0 : 36;
+ return isAmdHsaOrMesa(F) ? 0 : 36;
}
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMinFlatWorkGroupSize() const = 0;
/// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMaxFlatWorkGroupSize() const = 0;
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
- }
+ virtual unsigned getMinWavesPerEU() const = 0;
unsigned getMaxWavesPerEU() const { return 10; }
@@ -266,6 +263,7 @@ public:
ISAVersion9_0_2,
ISAVersion9_0_4,
ISAVersion9_0_6,
+ ISAVersion9_0_9,
};
enum TrapHandlerAbi {
@@ -300,6 +298,7 @@ protected:
Triple TargetTriple;
unsigned Gen;
unsigned IsaVersion;
+ InstrItineraryData InstrItins;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -323,11 +322,11 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
- bool EnableVGPRSpilling;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
bool EnableDS128;
+ bool EnablePRTStrictNull;
bool DumpCode;
// Subtarget statically properties set by tablegen
@@ -337,6 +336,7 @@ protected:
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
+ bool VIInsts;
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -346,15 +346,16 @@ protected:
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
- bool HasInv2PiInlineImm;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasR128A16;
bool HasDLInsts;
- bool D16PreservesUnusedBits;
+ bool HasDotInsts;
+ bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
@@ -372,7 +373,6 @@ protected:
bool FeatureDisable;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
private:
SIInstrInfo InstrInfo;
SITargetLowering TLInfo;
@@ -423,6 +423,10 @@ public:
return &TSInfo;
}
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
Generation getGeneration() const {
@@ -441,10 +445,6 @@ public:
return MaxPrivateElementSize;
}
- AMDGPUAS getAMDGPUAS() const {
- return AS;
- }
-
bool hasIntClamp() const {
return HasIntClamp;
}
@@ -517,6 +517,10 @@ public:
return FMA;
}
+ bool hasSwap() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
@@ -574,12 +578,19 @@ public:
return getGeneration() < AMDGPUSubtarget::GFX9;
}
+ /// \returns If target requires PRT Struct NULL support (zero result registers
+ /// for sparse texture support).
+ bool usePRTStrictNull() const {
+ return EnablePRTStrictNull;
+ }
+
bool hasAutoWaitcntBeforeBarrier() const {
return AutoWaitcntBeforeBarrier;
}
bool hasCodeObjectV3() const {
- return CodeObjectV3;
+ // FIXME: Need to add code object v3 support for mesa and pal.
+ return isAmdHsaOS() ? CodeObjectV3 : false;
}
bool hasUnalignedBufferAccess() const {
@@ -677,8 +688,12 @@ public:
return HasDLInsts;
}
- bool d16PreservesUnusedBits() const {
- return D16PreservesUnusedBits;
+ bool hasDotInsts() const {
+ return HasDotInsts;
+ }
+
+ bool isSRAMECCEnabled() const {
+ return EnableSRAMECC;
}
// Scratch is allocated in 256 dword per wave blocks for the entire
@@ -707,20 +722,19 @@ public:
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getEUsPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
/// \returns Maximum number of waves per execution unit supported by the
@@ -732,8 +746,7 @@ public:
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(
- MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
}
// static wrappers
@@ -747,8 +760,6 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
- bool isVGPRSpillingEnabled(const Function &F) const;
-
unsigned getMaxNumUserSGPRs() const {
return 16;
}
@@ -781,14 +792,15 @@ public:
return HasScalarAtomics;
}
- bool hasInv2PiInlineImm() const {
- return HasInv2PiInlineImm;
- }
bool hasDPP() const {
return HasDPP;
}
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -817,6 +829,11 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
bool hasSMovFedHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
@@ -851,39 +868,34 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU, Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -901,39 +913,34 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -949,6 +956,34 @@ public:
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -968,7 +1003,6 @@ private:
R600TargetLowering TLInfo;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
- AMDGPUAS AS;
public:
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -1053,8 +1087,6 @@ public:
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
- AMDGPUAS getAMDGPUAS() const { return AS; }
-
bool enableMachineScheduler() const override {
return true;
}
@@ -1062,6 +1094,34 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2205819c444f..e8cefdbf74b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -45,6 +45,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
#include <memory>
@@ -105,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(
cl::desc("Enable SDWA peepholer"),
cl::init(true));
+static cl::opt<bool> EnableDPPCombine(
+ "amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"),
+ cl::init(false));
+
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
@@ -137,6 +143,20 @@ static cl::opt<bool> EnableLowerKernelArguments(
cl::init(true),
cl::Hidden);
+// Enable atomic optimization
+static cl::opt<bool> EnableAtomicOptimizations(
+ "amdgpu-atomic-optimizations",
+ cl::desc("Enable atomic optimizations"),
+ cl::init(false),
+ cl::Hidden);
+
+// Enable Mode register optimization
+static cl::opt<bool> EnableSIModeRegisterPass(
+ "amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -150,18 +170,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
+ initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
+ initializeSIFixupVectorISelPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUFixFunctionBitcastsPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
+ initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
@@ -172,6 +196,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
+ initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
@@ -182,6 +207,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIFormMemoryClausesPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
+ initializeAMDGPUExternalAAWrapperPass(*PR);
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
initializeAMDGPUInlinerPass(*PR);
@@ -292,12 +318,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return Reloc::PIC_;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -306,9 +326,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OptLevel),
+ getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
TLOF(createTLOF(getTargetTriple())) {
- AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@@ -331,13 +350,6 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
FSAttr.getValueAsString();
}
-static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
- return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
- if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- });
-}
-
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
@@ -360,17 +372,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.Inliner = createAMDGPUFunctionInliningPass();
}
- if (Internalize) {
- // If we're generating code, we always have the whole program available. The
- // relocations expected for externally visible functions aren't supported,
- // so make sure every non-entry function is hidden.
- Builder.addExtension(
- PassManagerBuilder::EP_EnabledOnOptLevel0,
- [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createInternalizePass(mustPreserveGV));
- });
- }
-
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@@ -613,20 +614,23 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAtomicExpandPass());
+
+ // This must occur before inlining, as the inliner will not look through
+ // bitcast calls.
+ addPass(createAMDGPUFixFunctionBitcastsPass());
+
addPass(createAMDGPULowerIntrinsicsPass());
- if (TM.getTargetTriple().getArch() == Triple::r600 ||
- !EnableAMDGPUFunctionCalls) {
- // Function calls are not supported, so make sure we inline everything.
- addPass(createAMDGPUAlwaysInlinePass());
- addPass(createAlwaysInlinerLegacyPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
- }
+ // Function calls are not supported, so make sure we inline everything.
+ addPass(createAMDGPUAlwaysInlinePass());
+ addPass(createAlwaysInlinerLegacyPass());
+ // We need to add the barrier noop pass, otherwise adding the function
+ // inlining pass will cause all of the PassConfigs passes to be run
+ // one function at a time, which means if we have a nodule with two
+ // functions, then we will generate code for the first function
+ // without ever running any passes on the second.
+ addPass(createBarrierNoopPass());
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
// TODO: May want to move later or split into an early and late one.
@@ -690,6 +694,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
}
bool AMDGPUPassConfig::addPreISel() {
+ addPass(createLowerSwitchPass());
addPass(createFlattenCFGPass());
return false;
}
@@ -759,6 +764,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ if (EnableAtomicOptimizations) {
+ addPass(createAMDGPUAtomicOptimizerPass());
+ }
+
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
@@ -789,6 +798,8 @@ void GCNPassConfig::addMachineSSAOptimization() {
//
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
+ if (EnableDPPCombine)
+ addPass(&GCNDPPCombineID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
if (EnableSDWAPeephole) {
@@ -811,8 +822,10 @@ bool GCNPassConfig::addILPOpts() {
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
- addPass(createSILowerI1CopiesPass());
addPass(&SIFixSGPRCopiesID);
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixupVectorISelPass());
+ addPass(createSIAddIMGInitPass());
return false;
}
@@ -878,7 +891,8 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
void GCNPassConfig::addPostRegAlloc() {
addPass(&SIFixVGPRCopiesID);
- addPass(&SIOptimizeExecMaskingID);
+ if (getOptLevel() > CodeGenOpt::None)
+ addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
@@ -889,6 +903,7 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
addPass(createSIShrinkInstructionsPass());
+ addPass(createSIModeRegisterPass());
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0fe14493fabd..62fbe71d1902 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -34,7 +34,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
@@ -55,16 +54,13 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
- AMDGPUAS getAMDGPUAS() const {
- return AS;
- }
void adjustPassManager(PassManagerBuilder &) override;
+
/// Get the integer value of a null pointer in the given address space.
uint64_t getNullPointerValue(unsigned AddrSpace) const {
- if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
- return -1;
- return 0;
+ return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
}
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index e2f718bd3c34..c4e1efde130b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -29,3 +29,13 @@ MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
}
+
+MCSection *AMDGPUTargetObjectFile::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
+ // Set metadata access for the explicit section
+ StringRef SectionName = GO->getSection();
+ if (SectionName.startswith(".AMDGPU.comment."))
+ SK = SectionKind::getMetadata();
+
+ return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index dd9dc1a88fc2..a4ae1a2c18c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -26,6 +26,8 @@ class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
public:
MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index a68b8d03f06e..11e4ba4b5010 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -102,7 +102,6 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned ThresholdPrivate = UnrollThresholdPrivate;
unsigned ThresholdLocal = UnrollThresholdLocal;
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
- const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
@@ -140,9 +139,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned AS = GEP->getAddressSpace();
unsigned Threshold = 0;
- if (AS == ASST.PRIVATE_ADDRESS)
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
Threshold = ThresholdPrivate;
- else if (AS == ASST.LOCAL_ADDRESS)
+ else if (AS == AMDGPUAS::LOCAL_ADDRESS)
Threshold = ThresholdLocal;
else
continue;
@@ -150,7 +149,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (UP.Threshold >= Threshold)
continue;
- if (AS == ASST.PRIVATE_ADDRESS) {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca =
dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
@@ -160,7 +159,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
if (AllocaSize > MaxAlloca)
continue;
- } else if (AS == ASST.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
LocalGEPsSeen++;
// Inhibit unroll for local memory if we have seen addressing not to
// a variable, most likely we will be unable to combine it.
@@ -253,19 +252,18 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
}
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- AMDGPUAS AS = ST->getAMDGPUAS();
- if (AddrSpace == AS.GLOBAL_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
return 512;
}
- if (AddrSpace == AS.FLAT_ADDRESS ||
- AddrSpace == AS.LOCAL_ADDRESS ||
- AddrSpace == AS.REGION_ADDRESS)
+ if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
+ AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
return 128;
- if (AddrSpace == AS.PRIVATE_ADDRESS)
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
llvm_unreachable("unhandled address space");
@@ -277,7 +275,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
- if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
ChainSizeInBytes <= ST->getMaxPrivateElementSize();
}
@@ -545,14 +543,15 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
if (const Argument *A = dyn_cast<Argument>(V))
return !isArgPassedInSGPR(A);
- // Loads from the private address space are divergent, because threads
- // can execute the load instruction with the same inputs and get different
- // results.
+ // Loads from the private and flat address spaces are divergent, because
+ // threads can execute the load instruction with the same inputs and get
+ // different results.
//
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
- return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
+ return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
+ Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each
@@ -642,20 +641,19 @@ unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
}
unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- AMDGPUAS AS = ST->getAMDGPUAS();
- if (AddrSpace == AS.GLOBAL_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS)
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
return 128;
- if (AddrSpace == AS.LOCAL_ADDRESS ||
- AddrSpace == AS.REGION_ADDRESS)
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
return 64;
- if (AddrSpace == AS.PRIVATE_ADDRESS)
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return 32;
- if ((AddrSpace == AS.PARAM_D_ADDRESS ||
- AddrSpace == AS.PARAM_I_ADDRESS ||
- (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
- AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+ AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+ (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+ AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
return 128;
llvm_unreachable("unhandled address space");
}
@@ -666,9 +664,7 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
- if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
- return false;
- return true;
+ return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
}
bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 8e63d789e17d..397c5c6fa6fb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -179,7 +179,7 @@ public:
if (IsGraphicsShader)
return -1;
return ST->hasFlatAddressSpace() ?
- ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
+ AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
}
unsigned getVectorSplitCost() { return 0; }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 0d3a1673696a..ced3f6f567e2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -25,7 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -70,7 +70,7 @@ char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
@@ -78,10 +78,10 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
// TODO: Preserve dominator tree.
AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
// No divergent values are changed, only blocks and branch edges.
- AU.addPreserved<DivergenceAnalysis>();
+ AU.addPreserved<LegacyDivergenceAnalysis>();
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
@@ -95,7 +95,7 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
/// \returns true if \p BB is reachable through only uniform branches.
/// XXX - Is there a more efficient way to find this?
-static bool isUniformlyReached(const DivergenceAnalysis &DA,
+static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
BasicBlock &BB) {
SmallVector<BasicBlock *, 8> Stack;
SmallPtrSet<BasicBlock *, 8> Visited;
@@ -163,7 +163,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
if (PDT.getRoots().size() <= 1)
return false;
- DivergenceAnalysis &DA = getAnalysis<DivergenceAnalysis>();
+ LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
// Loop over all of the blocks in a function, tracking all of the blocks that
// return.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 31e2885c833d..3f9af27a2e5e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -156,13 +157,12 @@ public:
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
- ImmTyR128,
+ ImmTyR128A16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
ImmTyExpVM,
- ImmTyDFMT,
- ImmTyNFMT,
+ ImmTyFORMAT,
ImmTyHwreg,
ImmTyOff,
ImmTySendMsg,
@@ -291,7 +291,7 @@ public:
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
- bool isR128() const { return isImmTy(ImmTyR128); }
+ bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -312,8 +312,7 @@ public:
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
- bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
- bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
+ bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -666,8 +665,7 @@ public:
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
- case ImmTyDFMT: OS << "DFMT"; break;
- case ImmTyNFMT: OS << "NFMT"; break;
+ case ImmTyFORMAT: OS << "FORMAT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -681,7 +679,7 @@ public:
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
- case ImmTyR128: OS << "R128"; break;
+ case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -920,8 +918,7 @@ public:
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
MCSymbol *Sym =
@@ -1061,6 +1058,7 @@ public:
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
+ OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -1092,7 +1090,6 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
- bool validateMIMGR128(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1829,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
unsigned RegWidth) {
// Symbols are only defined for GCN targets
- if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+ if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
return true;
auto SymbolName = getGprCountSymbolName(RegKind);
@@ -2447,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
-bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
-
- const unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &Desc = MII.get(Opc);
-
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
- return true;
-
- int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
- assert(Idx != -1);
-
- bool R128 = (Inst.getOperand(Idx).getImm() != 0);
-
- return !R128 || hasMIMG_R128();
-}
-
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2497,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"integer clamping is not supported on this GPU");
return false;
}
- if (!validateMIMGR128(Inst)) {
- Error(IDLoc,
- "r128 modifier is not supported on this GPU");
- return false;
- }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
@@ -2661,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
- IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(getSTI().getCPU());
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
- unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+ IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
NumSGPRs > MaxAddressableNumSGPRs)
@@ -2681,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
if (Features.test(FeatureSGPRInitBug))
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
- VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
- SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
}
@@ -2702,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
StringSet<> Seen;
- IsaInfo::IsaVersion IVersion =
- IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
@@ -2962,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
@@ -3025,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -3091,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
+ const char *AssemblerDirectiveBegin;
+ const char *AssemblerDirectiveEnd;
+ std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
+ AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
+ ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd)
+ : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
+ HSAMD::AssemblerDirectiveEnd);
+
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getParser().getTok().getLoc(),
- (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
+ (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
@@ -3111,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
+ if (ID == AssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
@@ -3133,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
YamlStream.flush();
- if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ } else {
+ if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ }
return false;
}
@@ -3171,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
+
+ // TODO: Restructure/combine with PAL metadata directive.
+ if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
} else {
if (IDVal == ".hsa_code_object_version")
return ParseDirectiveHSACodeObjectVersion();
@@ -3186,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
- }
- if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
- return ParseDirectiveHSAMetadata();
+ if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
+ }
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();
@@ -3465,6 +3457,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
+ if (Tok == "r128" && isGFX9())
+ Error(S, "r128 modifier is not supported on this GPU");
+ if (Tok == "a16" && !isGFX9())
+ Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -3522,6 +3518,53 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
return MatchOperand_Success;
}
+// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
+// values to live in a joint format operand in the MCInst encoding.
+OperandMatchResultTy
+AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Dfmt = 0, Nfmt = 0;
+ // dfmt and nfmt can appear in either order, and each is optional.
+ bool GotDfmt = false, GotNfmt = false;
+ while (!GotDfmt || !GotNfmt) {
+ if (!GotDfmt) {
+ auto Res = parseIntWithPrefix("dfmt", Dfmt);
+ if (Res != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Res;
+ if (Dfmt >= 16) {
+ Error(Parser.getTok().getLoc(), "out of range dfmt");
+ return MatchOperand_ParseFail;
+ }
+ GotDfmt = true;
+ Parser.Lex();
+ continue;
+ }
+ }
+ if (!GotNfmt) {
+ auto Res = parseIntWithPrefix("nfmt", Nfmt);
+ if (Res != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Res;
+ if (Nfmt >= 8) {
+ Error(Parser.getTok().getLoc(), "out of range nfmt");
+ return MatchOperand_ParseFail;
+ }
+ GotNfmt = true;
+ Parser.Lex();
+ continue;
+ }
+ }
+ break;
+ }
+ if (!GotDfmt && !GotNfmt)
+ return MatchOperand_NoMatch;
+ auto Format = Dfmt | Nfmt << 4;
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
+ return MatchOperand_Success;
+}
+
//===----------------------------------------------------------------------===//
// ds
//===----------------------------------------------------------------------===//
@@ -3652,12 +3695,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
static bool
encodeCnt(
- const AMDGPU::IsaInfo::IsaVersion ISA,
+ const AMDGPU::IsaVersion ISA,
int64_t &IntVal,
int64_t CntVal,
bool Saturate,
- unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
- unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
+ unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
+ unsigned (*decode)(const IsaVersion &Version, unsigned))
{
bool Failed = false;
@@ -3688,8 +3731,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getParser().parseAbsoluteExpression(CntVal))
return true;
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
bool Sat = CntName.endswith("_sat");
@@ -3724,8 +3766,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
@@ -4617,8 +4658,7 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@@ -4661,7 +4701,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4761,8 +4801,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
- {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
- {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
+ {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4772,7 +4811,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
- {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
+ {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
@@ -4844,6 +4884,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
+ } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+ res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -5251,12 +5293,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
- // All DPP instructions with at least one source operand have a fake "old"
- // source at the beginning that's tied to the dst operand. Handle it here.
- if (Desc.getNumOperands() >= 2)
- Inst.addOperand(Inst.getOperand(0));
-
for (unsigned E = Operands.size(); I != E; ++I) {
+ auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
+ MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ assert((unsigned)TiedTo < Inst.getNumOperands());
+ // handle tied old or src2 for MAC instructions
+ Inst.addOperand(Inst.getOperand(TiedTo));
+ }
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
index b87c47a6b9ee..51c2abeac2ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -17,14 +17,12 @@ def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [],
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
-def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
class MubufLoad <SDPatternOperator op> : PatFrag <
(ops node:$ptr), (op node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS;
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
def mubuf_load : MubufLoad <load>;
@@ -100,15 +98,11 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
- bits<4> dfmt_value = 1; // the value for dfmt if no such operand
- bits<3> nfmt_value = 0; // the value for nfmt if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
- bits<1> has_dfmt = 1;
- bits<1> has_nfmt = 1;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
@@ -126,14 +120,16 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<12> offset;
bits<1> glc;
- bits<4> dfmt;
- bits<3> nfmt;
+ bits<7> format;
bits<8> vaddr;
bits<8> vdata;
bits<7> srsrc;
bits<1> slc;
bits<1> tfe;
bits<8> soffset;
+
+ bits<4> dfmt = format{3-0};
+ bits<3> nfmt = format{6-4};
}
class getMTBUFInsDA<list<RegisterClass> vdataList,
@@ -142,16 +138,16 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
@@ -169,15 +165,15 @@ class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
class getMTBUFAsmOps<int addrKind> {
string Pfx =
- !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
!if(!eq(addrKind, BUFAddrKind.OffEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+ "$vaddr, $srsrc, $format, $soffset offen",
!if(!eq(addrKind, BUFAddrKind.IdxEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+ "$vaddr, $srsrc, $format, $soffset idxen",
!if(!eq(addrKind, BUFAddrKind.BothEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+ "$vaddr, $srsrc, $format, $soffset idxen offen",
!if(!eq(addrKind, BUFAddrKind.Addr64),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+ "$vaddr, $srsrc, $format, $soffset addr64",
"")))));
string ret = Pfx # "$offset";
}
@@ -217,14 +213,14 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set load_vt:$vdata,
- (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
- i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -263,13 +259,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<1, NAME>;
@@ -290,6 +286,12 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
// MUBUF classes
//===----------------------------------------------------------------------===//
+class MUBUFGetBaseOpcode<string Op> {
+ string ret = !subst("DWORDX2", "DWORD",
+ !subst("DWORDX3", "DWORD",
+ !subst("DWORDX4", "DWORD", Op)));
+}
+
class MUBUF_Pseudo <string opName, dag outs, dag ins,
string asmOps, list<dag> pattern=[]> :
InstSI<outs, ins, "", pattern>,
@@ -303,6 +305,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
string Mnemonic = opName;
string AsmOperands = asmOps;
+ Instruction Opcode = !cast<Instruction>(NAME);
+ Instruction BaseOpcode = !cast<Instruction>(MUBUFGetBaseOpcode<NAME>.ret);
+
let VM_CNT = 1;
let EXP_CNT = 1;
let MUBUF = 1;
@@ -325,6 +330,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
+ bits<4> dwords = 0;
}
class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
@@ -398,6 +404,16 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
);
}
+class getMUBUFDwords<RegisterClass regClass> {
+ string regClassAsInt = !cast<string>(regClass);
+ int ret =
+ !if(!eq(regClassAsInt, !cast<string>(VGPR_32)), 1,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_64)), 2,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_96)), 3,
+ !if(!eq(regClassAsInt, !cast<string>(VReg_128)), 4,
+ 0))));
+}
+
class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> {
dag ret =
!if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isLds>.ret,
@@ -458,6 +474,7 @@ class MUBUF_Load_Pseudo <string opName,
let Uses = !if(isLds, [EXEC, M0], [EXEC]);
let has_tfe = !if(isLds, 0, 1);
let lds = isLds;
+ let dwords = getMUBUFDwords<vdataClass>.ret;
}
// FIXME: tfe can't be an operand because it requires a separate
@@ -521,6 +538,7 @@ class MUBUF_Store_Pseudo <string opName,
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
+ let dwords = getMUBUFDwords<vdataClass>.ret;
}
multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
@@ -660,11 +678,10 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
let AsmMatchConverter = "cvtMubufAtomicReturn";
}
-multiclass MUBUF_Pseudo_Atomics <string opName,
- RegisterClass vdataClass,
- ValueType vdataType,
- SDPatternOperator atomic> {
-
+multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> {
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
@@ -672,7 +689,12 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+}
+multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> {
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
@@ -690,6 +712,13 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
+multiclass MUBUF_Pseudo_Atomics <string opName,
+ RegisterClass vdataClass,
+ ValueType vdataType,
+ SDPatternOperator atomic> :
+ MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
+ MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
+
//===----------------------------------------------------------------------===//
// MUBUF Instructions
@@ -1030,6 +1059,14 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
// MUBUF Patterns
//===----------------------------------------------------------------------===//
+def extract_glc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_slc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1037,119 +1074,129 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0)),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm)),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$glc, imm:$slc),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$glc, imm:$slc),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i1imm $glc), (as_i1imm $slc), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
//===----------------------------------------------------------------------===//
// buffer_atomic patterns
@@ -1158,36 +1205,36 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i1imm $slc))
+ (as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
>;
}
@@ -1205,49 +1252,49 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
- imm:$slc),
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
- imm:$slc),
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
sub0)
>;
@@ -1397,54 +1444,6 @@ defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
}
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt,
- MUBUF_Pseudo offset,
- MUBUF_Pseudo offen,
- MUBUF_Pseudo idxen,
- MUBUF_Pseudo bothen> {
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
- imm:$offset, 0, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 0, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
- BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
- BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
- BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is forst
@@ -1524,32 +1523,36 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1576,39 +1579,36 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
- imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$offset, imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1781,8 +1781,8 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{15} = ps.addr64;
let Inst{18-16} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1811,6 +1811,7 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
//===----------------------------------------------------------------------===//
// CI
+// MTBUF - GFX6, GFX7.
//===----------------------------------------------------------------------===//
class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
@@ -2013,8 +2014,8 @@ class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2043,8 +2044,8 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2089,3 +2090,22 @@ let SubtargetPredicate = HasPackedD16VMem in {
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>;
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>;
} // End HasUnpackedD16VMem.
+
+def MUBUFInfoTable : GenericTable {
+ let FilterClass = "MUBUF_Pseudo";
+ let CppTypeName = "MUBUFInfo";
+ let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getMUBUFOpcodeHelper";
+}
+
+def getMUBUFInfoFromOpcode : SearchIndex {
+ let Table = MUBUFInfoTable;
+ let Key = ["Opcode"];
+}
+
+def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex {
+ let Table = MUBUFInfoTable;
+ let Key = ["BaseOpcode", "dwords"];
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
index cdc6ab9412e6..31d2ebef481d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -728,7 +728,9 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
(i1 0))
>;
-let OtherPredicates = [LDSRequiresM0Init] in {
+// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
+// related to bounds checking.
+let OtherPredicates = [LDSRequiresM0Init, isCIVI] in {
def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3ef473b7fd96..44040d352e6a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -121,6 +121,11 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
}
+class GlobalSaddrTable <bit is_saddr, string Name = ""> {
+ bit IsSaddr = is_saddr;
+ string SaddrOp = Name;
+}
+
// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
// same encoding value as exec_hi, so it isn't possible to use that if
// saddr is 32-bit (which isn't handled here yet).
@@ -171,15 +176,19 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
- def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>;
- def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>;
+ def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>,
+ GlobalSaddrTable<1, opName>;
}
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1 in {
- def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
- def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>;
+ def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>,
+ GlobalSaddrTable<1, opName>;
}
}
@@ -262,6 +271,7 @@ multiclass FLAT_Atomic_Pseudo<
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
" $vaddr, $vdata$offset$slc">,
+ GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
}
@@ -272,10 +282,11 @@ multiclass FLAT_Atomic_Pseudo<
" $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>;
}
-multiclass FLAT_Global_Atomic_Pseudo<
+multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
@@ -287,35 +298,48 @@ multiclass FLAT_Global_Atomic_Pseudo<
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
" $vaddr, $vdata, off$offset$slc">,
+ GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let has_saddr = 1;
let PseudoInstr = NAME;
}
- def _RTN : FLAT_AtomicRet_Pseudo <opName,
- (outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
- " $vdst, $vaddr, $vdata, off$offset glc$slc",
- [(set vt:$vdst,
- (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
- AtomicNoRet <opName, 1> {
- let has_saddr = 1;
- }
-
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
+ GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR";
}
+}
+
+multiclass FLAT_Global_Atomic_Pseudo_RTN<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ SDPatternOperator atomic = null_frag,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc> {
+
+ def _RTN : FLAT_AtomicRet_Pseudo <opName,
+ (outs vdst_rc:$vdst),
+ (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+ " $vdst, $vaddr, $vdata, off$offset glc$slc",
+ [(set vt:$vdst,
+ (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+ GlobalSaddrTable<0, opName#"_rtn">,
+ AtomicNoRet <opName, 1> {
+ let has_saddr = 1;
+ }
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
+ GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
let enabled_saddr = 1;
@@ -323,10 +347,20 @@ multiclass FLAT_Global_Atomic_Pseudo<
}
}
+multiclass FLAT_Global_Atomic_Pseudo<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ SDPatternOperator atomic = null_frag,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc> :
+ FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
+ FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
+
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
>;
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
new file mode 100644
index 000000000000..56071d0d2374
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -0,0 +1,446 @@
+//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand.If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+// dpp_controls..., $bound_ctrl
+// $res = VALU $dpp_value, ...
+//
+// to
+//
+// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
+// dpp_controls..., $folded_bound_ctrl
+//
+// Combining rules :
+//
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+//
+// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gcn-dpp-combine"
+
+STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
+
+namespace {
+
+class GCNDPPCombine : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const SIInstrInfo *TII;
+
+ using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
+ MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
+
+ RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpnd,
+ bool BoundCtrlZero) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const;
+
+ bool hasNoImmOrEqual(MachineInstr &MI,
+ unsigned OpndName,
+ int64_t Value,
+ int64_t Mask = -1) const;
+
+ bool combineDPPMov(MachineInstr &MI) const;
+
+public:
+ static char ID;
+
+ GCNDPPCombine() : MachineFunctionPass(ID) {
+ initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "GCN DPP Combine"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+
+char GCNDPPCombine::ID = 0;
+
+char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+
+FunctionPass *llvm::createGCNDPPCombinePass() {
+ return new GCNDPPCombine();
+}
+
+static int getDPPOp(unsigned Op) {
+ auto DPP32 = AMDGPU::getDPPOp32(Op);
+ if (DPP32 != -1)
+ return DPP32;
+
+ auto E32 = AMDGPU::getVOPe32(Op);
+ return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
+}
+
+// tracks the register operand definition and returns:
+// 1. immediate operand used to initialize the register if found
+// 2. nullptr if the register operand is undef
+// 3. the operand itself otherwise
+MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
+ auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
+ if (!Def)
+ return nullptr;
+
+ switch(Def->getOpcode()) {
+ default: break;
+ case AMDGPU::IMPLICIT_DEF:
+ return nullptr;
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = Def->getOperand(1);
+ if (Op1.isImm())
+ return &Op1;
+ break;
+ }
+ }
+ return &OldOpnd;
+}
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
+
+ auto OrigOp = OrigMI.getOpcode();
+ auto DPPOp = getDPPOp(OrigOp);
+ if (DPPOp == -1) {
+ LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
+ return nullptr;
+ }
+
+ auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
+ OrigMI.getDebugLoc(), TII->get(DPPOp));
+ bool Fail = false;
+ do {
+ auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
+ assert(Dst);
+ DPPInst.add(*Dst);
+ int NumOperands = 1;
+
+ const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
+ if (OldIdx != -1) {
+ assert(OldIdx == NumOperands);
+ assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+ DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+ ++NumOperands;
+ }
+
+ if (auto *Mod0 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src0_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src0_modifiers));
+ assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod0->getImm());
+ ++NumOperands;
+ }
+ auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
+ assert(Src0);
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
+ LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src0);
+ ++NumOperands;
+
+ if (auto *Mod1 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src1_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src1_modifiers));
+ assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod1->getImm());
+ ++NumOperands;
+ }
+ if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+ LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src1);
+ ++NumOperands;
+ }
+
+ if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
+ LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src2);
+ }
+
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
+ DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+ } while (false);
+
+ if (Fail) {
+ DPPInst.getInstr()->eraseFromParent();
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
+ return DPPInst.getInstr();
+}
+
+GCNDPPCombine::RegSubRegPair
+GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const {
+ assert(OldOpndValue.isImm());
+ switch (OrigMI.getOpcode()) {
+ default: break;
+ case AMDGPU::V_MAX_U32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MAX_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MIN_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
+ return OldOpndVGPR;
+ break;
+
+ case AMDGPU::V_MUL_I32_I24_e32:
+ case AMDGPU::V_MUL_U32_U24_e32:
+ if (OldOpndValue.getImm() == 1) {
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ assert(Src1 && Src1->isReg());
+ return getRegSubRegPair(*Src1);
+ }
+ break;
+ }
+ return RegSubRegPair();
+}
+
+// Cases to combine:
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+// -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+// -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+// -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpndValue,
+ bool BoundCtrlZero) const {
+ assert(OldOpndVGPR.Reg);
+ if (!BoundCtrlZero && OldOpndValue) {
+ assert(OldOpndValue->isImm());
+ OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
+ if (!OldOpndVGPR.Reg) {
+ LLVM_DEBUG(dbgs() << " failed: old immediate cannot be folded\n");
+ return nullptr;
+ }
+ }
+ return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+}
+
+// returns true if MI doesn't have OpndName immediate operand or the
+// operand has Value
+bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
+ int64_t Value, int64_t Mask) const {
+ auto *Imm = TII->getNamedOperand(MI, OpndName);
+ if (!Imm)
+ return true;
+
+ assert(Imm->isImm());
+ return (Imm->getImm() & Mask) == Value;
+}
+
+bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
+ assert(BCZOpnd && BCZOpnd->isImm());
+ bool BoundCtrlZero = 0 != BCZOpnd->getImm();
+
+ LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+ auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
+ assert(OldOpnd && OldOpnd->isReg());
+ auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
+ auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+ assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
+ if (OldOpndValue) {
+ if (BoundCtrlZero) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
+ OldOpndValue = nullptr;
+ } else {
+ if (!OldOpndValue->isImm()) {
+ LLVM_DEBUG(dbgs() << " failed: old operand isn't an imm or undef\n");
+ return false;
+ }
+ if (OldOpndValue->getImm() == 0) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
+ OldOpndValue = nullptr;
+ BoundCtrlZero = true;
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " old=";
+ if (!OldOpndValue)
+ dbgs() << "undef";
+ else
+ dbgs() << OldOpndValue->getImm();
+ dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
+
+ std::vector<MachineInstr*> OrigMIs, DPPMIs;
+ if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
+ OldOpndVGPR = RegSubRegPair(
+ MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
+ auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+ DPPMIs.push_back(UndefInst.getInstr());
+ }
+
+ OrigMIs.push_back(&MovMI);
+ bool Rollback = true;
+ for (auto &Use : MRI->use_nodbg_operands(
+ TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+ Rollback = true;
+
+ auto &OrigMI = *Use.getParent();
+ auto OrigOp = OrigMI.getOpcode();
+ if (TII->isVOP3(OrigOp)) {
+ if (!TII->hasVALU32BitEncoding(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
+ break;
+ }
+ // check if other than abs|neg modifiers are set (opsel for example)
+ const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
+ if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
+ break;
+ }
+ } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
+ if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else if (OrigMI.isCommutable() &&
+ &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ auto *BB = OrigMI.getParent();
+ auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
+ BB->insert(OrigMI, NewMI);
+ if (TII->commuteInstruction(*NewMI)) {
+ LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
+ if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else
+ LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
+ NewMI->eraseFromParent();
+ } else
+ LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
+ if (Rollback)
+ break;
+ OrigMIs.push_back(&OrigMI);
+ }
+
+ for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
+ MI->eraseFromParent();
+
+ return !Rollback;
+}
+
+bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasDPP() || skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = ST.getInstrInfo();
+
+ assert(MRI->isSSA() && "Must be run on SSA");
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
+ auto &MI = *I++;
+ if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
+ Changed = true;
+ ++NumDPPMovsCombined;
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index f236f10ba75a..c6396de89c4f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -215,6 +215,14 @@ void GCNHazardRecognizer::AdvanceCycle() {
if (!CurrCycleInstr)
return;
+ // Do not track non-instructions which do not affect the wait states.
+ // If included, these instructions can lead to buffer overflow such that
+ // detectable hazards are missed.
+ if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+ return;
+ else if (CurrCycleInstr->isDebugInstr())
+ return;
+
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
@@ -253,8 +261,7 @@ int GCNHazardRecognizer::getWaitStatesSince(
return WaitStates;
unsigned Opcode = MI->getOpcode();
- if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
- Opcode == AMDGPU::INLINEASM)
+ if (Opcode == AMDGPU::INLINEASM)
continue;
}
++WaitStates;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
index 651091d44136..d62dc8d86781 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -335,7 +335,7 @@ GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
assert(C);
AvailQueue.remove(*C);
auto SU = C->SU;
- LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
advanceToCycle(SU->getHeight());
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 15366d66bd85..8e4cc391dc21 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -434,8 +434,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
// Sort recorded regions by pressure - highest at the front
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
- llvm::sort(Regions.begin(), Regions.end(),
- [&ST, TargetOcc](const Region *R1, const Region *R2) {
+ llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
});
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 192d534bb9cf..ec6bcae33555 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -258,7 +258,7 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
assert(C);
RQ.remove(*C);
auto SU = C->SU;
- LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
releaseSuccessors(SU, StepNo);
Schedule.push_back(SU);
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
index d76acfa24f90..b8142a4e4ff8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -156,3 +156,8 @@ def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
[FeatureISAVersion9_0_6]
>;
+
+def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
+ [FeatureISAVersion9_0_9]
+>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index db908368a179..fab0f87dfcbe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "da");
}
-void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "r128");
+ if (STI.hasFeature(AMDGPU::FeatureR128A16))
+ printNamedBit(MI, OpNo, O, "a16");
+ else
+ printNamedBit(MI, OpNo, O, "r128");
}
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
@@ -236,21 +239,12 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
O << " vm";
}
-void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " dfmt:";
- printU8ImmDecOperand(MI, OpNo, O);
- }
-}
-
-void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " nfmt:";
- printU8ImmDecOperand(MI, OpNo, O);
+void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (unsigned Val = MI->getOperand(OpNo).getImm()) {
+ O << " dfmt:" << (Val & 15);
+ O << ", nfmt:" << (Val >> 4);
}
}
@@ -1161,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 11a496a38b2c..0ba74ca0f3e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -80,7 +80,7 @@ private:
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -90,10 +90,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printDFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printNFMT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFORMAT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 07bef9103c0d..c85a1ea5b054 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -46,11 +46,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
if (const auto *SymA = Target.getSymA()) {
// SCRATCH_RSRC_DWORD[01] is a special global variable that represents
// the scratch buffer.
- if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
+ if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0" ||
+ SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
return ELF::R_AMDGPU_ABS32_LO;
-
- if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
- return ELF::R_AMDGPU_ABS32_HI;
}
switch (Target.getAccessVariant()) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6a41e3f650bc..c17fe126546c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -17,7 +17,9 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -27,6 +29,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -34,90 +37,13 @@ namespace llvm {
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
//===----------------------------------------------------------------------===//
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
-static const struct {
- const char *Name;
- unsigned Mach;
-} MachTable[] = {
- // Radeon HD 2000/3000 Series (R600).
- { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
- { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
- { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
- { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
- // Radeon HD 4000 Series (R700).
- { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
- { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
- { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
- // Radeon HD 5000 Series (Evergreen).
- { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
- { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
- { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
- { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
- { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
- // Radeon HD 6000 Series (Northern Islands).
- { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
- { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
- { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
- { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
- // AMDGCN GFX6.
- { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- // AMDGCN GFX7.
- { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
- { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- // AMDGCN GFX8.
- { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- // AMDGCN GFX9.
- { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
- { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
- { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
- { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
- // Not specified processor.
- { nullptr, ELF::EF_AMDGPU_MACH_NONE }
-};
-
-unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
- auto Entry = MachTable;
- for (; Entry->Name && GPU != Entry->Name; ++Entry)
- ;
- return Entry->Mach;
-}
-
-const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
- auto Entry = MachTable;
- for (; Entry->Name && Mach != Entry->Mach; ++Entry)
- ;
- return Entry->Name;
-}
-
-bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
+bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
return false;
@@ -125,6 +51,104 @@ bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
return EmitHSAMetadata(HSAMetadata);
}
+bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot;
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> HSAMetadataRoot;
+ if (YIn.error())
+ return false;
+ return EmitHSAMetadata(HSAMetadataRoot, false);
+}
+
+StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
+ AMDGPU::GPUKind AK;
+
+ switch (ElfMach) {
+ case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
+ case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
+ case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
+ case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
+ case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
+ case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
+ case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ }
+
+ StringRef GPUName = getArchNameAMDGCN(AK);
+ if (GPUName != "")
+ return GPUName;
+ return getArchNameR600(AK);
+}
+
+unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
+ AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+ if (AK == AMDGPU::GPUKind::GK_NONE)
+ AK = parseArchR600(GPU);
+
+ switch (AK) {
+ case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
+ case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
+ case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
+ case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
+ case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
+ case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
+ case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
+ case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
+ case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
+ case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
+ case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
+ case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
+ case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
+ case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
+ case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
+ case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
+ case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
+ case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
+ case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
+ case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
+ case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
+ case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
+ case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
+ case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
+ case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
+ case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
+ case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
+ case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
+ case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
+ case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
+ case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+ case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+ case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
+ }
+
+ llvm_unreachable("unknown GPU");
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetAsmStreamer
//===----------------------------------------------------------------------===//
@@ -183,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
return false;
- OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
+ OS << '\t' << AssemblerDirectiveBegin << '\n';
OS << HSAMetadataString << '\n';
- OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
+ OS << '\t' << AssemblerDirectiveEnd << '\n';
+ return true;
+}
+
+bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
+ OS << StrOS.str() << '\n';
+ OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
return true;
}
@@ -203,70 +244,59 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
- amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
-
- IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \
- DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \
- AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
-
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
+#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
+ STREAM << "\t\t" << DIRECTIVE << " " \
+ << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
+ << '\n';
+ OS << "\t\t.amdhsa_private_segment_fixed_size "
+ << KD.private_segment_fixed_size << '\n';
+
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ PRINT_FIELD(
+ OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+ PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+ PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
@@ -279,54 +309,52 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+ PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+ PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+ PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+ PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
if (IVersion.Major >= 9)
- PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
+ PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ PRINT_FIELD(
+ OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+ PRINT_FIELD(
+ OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
- PRINT_IF_NOT_DEFAULT(
- OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_IF_NOT_DEFAULT
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+ PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+ PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+#undef PRINT_FIELD
OS << "\t.end_amdhsa_kernel\n";
}
@@ -342,12 +370,16 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags &= ~ELF::EF_AMDGPU_MACH;
- EFlags |= getMACH(STI.getCPU());
+ EFlags |= getElfMach(STI.getCPU());
EFlags &= ~ELF::EF_AMDGPU_XNACK;
if (AMDGPU::hasXNACK(STI))
EFlags |= ELF::EF_AMDGPU_XNACK;
+ EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
+ if (AMDGPU::hasSRAMECC(STI))
+ EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
+
MCA.setELFHeaderEFlags(EFlags);
}
@@ -355,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
-void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
- const MCExpr *DescSZ, unsigned NoteType,
+void AMDGPUTargetELFStreamer::EmitNote(
+ StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
- auto NameSZ = sizeof(ElfNote::NoteName);
+ auto NameSZ = Name.size() + 1;
S.PushSection();
S.SwitchSection(Context.getELFSection(
@@ -369,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
S.EmitIntValue(NoteType, 4); // type
- S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
+ S.EmitBytes(Name); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
@@ -381,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
uint32_t Major, uint32_t Minor) {
- EmitAMDGPUNote(
- MCConstantExpr::create(8, getContext()),
- ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
- [&](MCELFStreamer &OS){
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ });
}
void
@@ -404,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
- EmitAMDGPUNote(
- MCConstantExpr::create(DescSZ, getContext()),
- ElfNote::NT_AMDGPU_HSA_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitIntValue(VendorNameSize, 2);
- OS.EmitIntValue(ArchNameSize, 2);
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- OS.EmitIntValue(Stepping, 4);
- OS.EmitBytes(VendorName);
- OS.EmitIntValue(0, 1); // NULL terminate VendorName
- OS.EmitBytes(ArchName);
- OS.EmitIntValue(0, 1); // NULL terminte ArchName
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
+ ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(VendorNameSize, 2);
+ OS.EmitIntValue(ArchNameSize, 2);
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitIntValue(Stepping, 4);
+ OS.EmitBytes(VendorName);
+ OS.EmitIntValue(0, 1); // NULL terminate VendorName
+ OS.EmitBytes(ArchName);
+ OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ });
}
void
@@ -447,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(IsaVersionString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(IsaVersionString);
+ OS.EmitLabel(DescEnd);
+ });
+ return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ msgpack::Writer MPWriter(StrOS);
+ HSAMetadataRoot->write(MPWriter);
+
+ // Create two labels to mark the beginning and end of the desc field
+ // and a MCExpr to calculate the size of the desc field.
+ auto &Context = getContext();
+ auto *DescBegin = Context.createTempSymbol();
+ auto *DescEnd = Context.createTempSymbol();
+ auto *DescSZ = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(DescEnd, Context),
+ MCSymbolRefExpr::create(DescBegin, Context), Context);
+
+ EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(StrOS.str());
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
@@ -474,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_HSA_METADATA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(HSAMetadataString);
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
bool AMDGPUTargetELFStreamer::EmitPALMetadata(
const PALMD::Metadata &PALMetadata) {
- EmitAMDGPUNote(
- MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
- ELF::NT_AMD_AMDGPU_PAL_METADATA,
- [&](MCELFStreamer &OS){
- for (auto I : PALMetadata)
- OS.EmitIntValue(I, sizeof(uint32_t));
- }
- );
+ EmitNote(ElfNote::NoteNameV2,
+ MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
+ getContext()),
+ ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
+ for (auto I : PALMetadata)
+ OS.EmitIntValue(I, sizeof(uint32_t));
+ });
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 472da1b73593..9a807c804f9f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#include "AMDKernelCodeT.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -31,13 +32,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
MCContext &getContext() const { return Streamer.getContext(); }
- /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
- unsigned getMACH(StringRef GPU) const;
-
public:
- /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
- static const char *getMachName(unsigned Mach);
-
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
@@ -58,7 +53,20 @@ public:
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
/// \returns True on success, false on failure.
- virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
+ virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
+
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
+
+ /// Emit HSA Metadata
+ ///
+ /// When \p Strict is true, known metadata elements must already be
+ /// well-typed. When \p Strict is false, known types are inferred and
+ /// the \p HSAMetadata structure is updated with the correct types.
+ ///
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
@@ -71,6 +79,9 @@ public:
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) = 0;
+
+ static StringRef getArchNameFromElfMach(unsigned ElfMach);
+ static unsigned getElfMach(StringRef GPU);
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@@ -95,6 +106,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
@@ -110,8 +125,8 @@ public:
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
- void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
- function_ref<void(MCELFStreamer &)> EmitDesc);
+ void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
+ function_ref<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@@ -135,6 +150,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 44c2d366e461..1c68dbd78e75 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -29,6 +29,7 @@ class MIMGBaseOpcode {
bit Atomic = 0;
bit AtomicX2 = 0; // (f)cmpswap
bit Sampler = 0;
+ bit Gather4 = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
bit Coordinates = 1;
@@ -43,7 +44,7 @@ def MIMGBaseOpcode : GenericEnum {
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
- let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+ let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4",
"NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
"HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
@@ -141,7 +142,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -179,6 +180,8 @@ multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 0>;
let VDataDwords = 4 in
defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 0>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_NoSampler_Src_Helper <op, asm, VReg_256, 0>;
}
}
@@ -199,7 +202,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +255,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
@@ -316,7 +319,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -411,6 +414,8 @@ multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>;
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
}
}
@@ -421,6 +426,7 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
string asm = "image_gather4"#sample.LowerCaseMod> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = 1;
+ let Gather4 = 1;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
@@ -429,6 +435,8 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
+ let VDataDwords = 8 in
+ defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 1683fe6c9a57..679cf18d2c20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -226,11 +226,11 @@ private:
// occur in the same basic block as its definition, because
// it is illegal for the scheduler to schedule them in
// different blocks.
- if (UseI->readsRegister(MOI->getReg()))
+ if (UseI->readsRegister(MOI->getReg(), &TRI))
LastUseCount = AluInstCount;
// Exit early if the current use kills the register
- if (UseI != Def && UseI->killsRegister(MOI->getReg()))
+ if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
break;
}
if (LastUseCount)
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index e00dffc4be99..e2a0f05d2b34 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
case Intrinsic::r600_implicitarg_ptr: {
- MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
@@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
- MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.PARAM_I_ADDRESS);
+ AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
- assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+ assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
@@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Load dword
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+ Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
Chain = Dst.getValue(1);
@@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
- if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
- if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
@@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
- if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
@@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
- if (AS != AMDGPUASI.PRIVATE_ADDRESS)
+ if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
@@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
// Load dword
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(UndefValue::get(
- Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+ Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
// Get offset within the register.
@@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = LoadNode->getMemoryVT();
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
- if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
return lowerPrivateExtLoad(Op, DAG);
}
@@ -1451,8 +1451,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
- if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
- LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
+ if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
@@ -1473,7 +1473,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(4, DL, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
+ AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
@@ -1509,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
- if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return SDValue();
}
@@ -1606,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.PARAM_I_ADDRESS);
+ AMDGPUAS::PARAM_I_ADDRESS);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1656,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const {
// Local and Private addresses do not handle vectors. Limit to i32
- if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);
}
return true;
@@ -1685,14 +1685,15 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
static SDValue CompactSwizzlableVector(
SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
+
+ SDLoc DL(VectorEntry);
+ EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+ SDValue NewBldVec[4];
+ for (unsigned i = 0; i < 4; i++)
+ NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+ DAG.getIntPtrConstant(i, DL));
for (unsigned i = 0; i < 4; i++) {
if (NewBldVec[i].isUndef())
@@ -1727,15 +1728,17 @@ static SDValue CompactSwizzlableVector(
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {
- assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
- SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
- };
- bool isUnmovable[4] = { false, false, false, false };
+
+ SDLoc DL(VectorEntry);
+ EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+ SDValue NewBldVec[4];
+ bool isUnmovable[4] = {false, false, false, false};
+ for (unsigned i = 0; i < 4; i++)
+ NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+ DAG.getIntPtrConstant(i, DL));
+
for (unsigned i = 0; i < 4; i++) {
RemapSwizzle[i] = i;
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
@@ -1766,7 +1769,6 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
SelectionDAG &DAG,
const SDLoc &DL) const {
- assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
// Old -> New swizzle values
DenseMap<unsigned, unsigned> SwizzleRemap;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5397e779474c..9cc3e5f3c314 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -229,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
+ return MI.findRegisterUseOperandIdx(R600::AR_X, false, &RI) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
+ return MI.findRegisterDefOperandIdx(R600::AR_X, false, false, &RI) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@@ -1500,19 +1500,19 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
}
unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const {
+ unsigned Kind) const {
switch (Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+ return AMDGPUAS::PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+ return AMDGPUAS::CONSTANT_ADDRESS;
}
+
llvm_unreachable("Invalid pseudo source kind");
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 7a3dece31665..e6e34dc125f4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -324,7 +324,7 @@ public:
}
unsigned getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const override;
+ unsigned Kind) const override;
};
namespace R600 {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 7bf174f4cd86..10e873755222 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -299,7 +299,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
- (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
>;
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -309,8 +309,8 @@ def vtx_id3_load : LoadParamFrag<load>;
class LoadVtxId1 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
!isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
}]>;
@@ -322,7 +322,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
}]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index a1429a2ac50f..7769a35aadce 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -127,13 +127,13 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
LLVM_DEBUG(if (SU) {
dbgs() << " ** Pick node **\n";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
} else {
dbgs() << "NO NODE \n";
for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
const SUnit &S = DAG->SUnits[i];
if (!S.isScheduled)
- S.dump(DAG);
+ DAG->dumpNode(S);
}
});
@@ -188,11 +188,11 @@ isPhysicalRegCopy(MachineInstr *MI) {
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
- LLVM_DEBUG(dbgs() << "Top Releasing "; SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
}
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
- LLVM_DEBUG(dbgs() << "Bottom Releasing "; SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
if (isPhysicalRegCopy(SU->getInstr())) {
PhysicalRegCopy.push_back(SU);
return;
@@ -236,6 +236,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
// MI will become a KILL, don't considers it in scheduling
return AluDiscarded;
}
+ break;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
new file mode 100644
index 000000000000..69cafef4a351
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -0,0 +1,181 @@
+//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Any MIMG instructions that use tfe or lwe require an initialization of the
+/// result register that will be written in the case of a memory access failure
+/// The required code is also added to tie this init code to the result of the
+/// img instruction
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-img-init"
+
+using namespace llvm;
+
+namespace {
+
+class SIAddIMGInit : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIAddIMGInit() : MachineFunctionPass(ID) {
+ initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
+
+char SIAddIMGInit::ID = 0;
+
+char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
+
+FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
+
+bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *RI = ST.getRegisterInfo();
+ bool Changed = false;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ auto Opcode = MI.getOpcode();
+ if (TII->isMIMG(Opcode) && !MI.mayStore()) {
+ MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+ MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+ MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
+
+ // Check for instructions that don't have tfe or lwe fields
+ // There shouldn't be any at this point.
+ assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
+
+ unsigned TFEVal = TFE->getImm();
+ unsigned LWEVal = LWE->getImm();
+ unsigned D16Val = D16 ? D16->getImm() : 0;
+
+ if (TFEVal || LWEVal) {
+ // At least one of TFE or LWE are non-zero
+ // We have to insert a suitable initialization of the result value and
+ // tie this to the dest of the image instruction.
+
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ int DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+
+ // Calculate which dword we have to initialize to 0.
+ MachineOperand *MO_Dmask =
+ TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+
+ // check that dmask operand is found.
+ assert(MO_Dmask && "Expected dmask operand in instruction");
+
+ unsigned dmask = MO_Dmask->getImm();
+ // Determine the number of active lanes taking into account the
+ // Gather4 special case
+ unsigned ActiveLanes =
+ TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
+
+ // Subreg indices are counted from 1
+ // When D16 then we want next whole VGPR after write data.
+ static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
+
+ bool Packed = !ST.hasUnpackedD16VMem();
+
+ unsigned InitIdx =
+ D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
+
+ // Abandon attempt if the dst size isn't large enough
+ // - this is in fact an error but this is picked up elsewhere and
+ // reported correctly.
+ uint32_t DstSize =
+ RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+ if (DstSize < InitIdx)
+ continue;
+
+ // Create a register for the intialization value.
+ unsigned PrevDst =
+ MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+ unsigned NewDst = 0; // Final initialized value will be in here
+
+ // If PRTStrictNull feature is enabled (the default) then initialize
+ // all the result registers to 0, otherwise just the error indication
+ // register (VGPRn+1)
+ unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
+ unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
+
+ if (DstSize == 1) {
+ // In this case we can just initialize the result directly
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
+ .addImm(0);
+ NewDst = PrevDst;
+ } else {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
+ for (; SizeLeft; SizeLeft--, CurrIdx++) {
+ NewDst =
+ MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+ // Initialize dword
+ unsigned SubReg =
+ MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
+ .addImm(0);
+ // Insert into the super-reg
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
+ .addReg(PrevDst)
+ .addReg(SubReg)
+ .addImm(CurrIdx);
+
+ PrevDst = NewDst;
+ }
+ }
+
+ // Add as an implicit operand
+ MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
+
+ // Tie the just added implicit operand to the dst
+ MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
+
+ Changed = true;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 74f1bd8fb986..98e9ea662324 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
@@ -52,7 +52,7 @@ using StackEntry = std::pair<BasicBlock *, Value *>;
using StackVector = SmallVector<StackEntry, 16>;
class SIAnnotateControlFlow : public FunctionPass {
- DivergenceAnalysis *DA;
+ LegacyDivergenceAnalysis *DA;
Type *Boolean;
Type *Void;
@@ -66,9 +66,7 @@ class SIAnnotateControlFlow : public FunctionPass {
Function *If;
Function *Else;
- Function *Break;
Function *IfBreak;
- Function *ElseBreak;
Function *Loop;
Function *EndCf;
@@ -95,8 +93,7 @@ class SIAnnotateControlFlow : public FunctionPass {
Value *
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
- BranchInst *Term,
- SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions);
+ BranchInst *Term);
void handleLoop(BranchInst *Term);
@@ -116,7 +113,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<DivergenceAnalysis>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -127,7 +124,7 @@ public:
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
@@ -149,9 +146,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
- Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
- ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
return false;
@@ -160,7 +155,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
/// Is the branch condition uniform or did the StructurizeCFG pass
/// consider it as such?
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
- return DA->isUniform(T->getCondition()) ||
+ return DA->isUniform(T) ||
T->getMetadata("structurizecfg.uniform") != nullptr;
}
@@ -227,76 +222,7 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
/// Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(
- Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term,
- SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) {
- // Only search through PHI nodes which are inside the loop. If we try this
- // with PHI nodes that are outside of the loop, we end up inserting new PHI
- // nodes outside of the loop which depend on values defined inside the loop.
- // This will break the module with
- // 'Instruction does not dominate all users!' errors.
- PHINode *Phi = nullptr;
- if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
- BasicBlock *Parent = Phi->getParent();
- PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
- Value *Ret = NewPhi;
-
- // Handle all non-constant incoming values first
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = Phi->getIncomingValue(i);
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (isa<ConstantInt>(Incoming)) {
- NewPhi->addIncoming(Broken, From);
- continue;
- }
-
- Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken, L,
- Term, LoopPhiConditions);
- NewPhi->addIncoming(PhiArg, From);
- }
-
- BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
-
- for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = Phi->getIncomingValue(i);
- if (Incoming != BoolTrue)
- continue;
-
- BasicBlock *From = Phi->getIncomingBlock(i);
- if (From == IDom) {
- // We're in the following situation:
- // IDom/From
- // | \
- // | If-block
- // | /
- // Parent
- // where we want to break out of the loop if the If-block is not taken.
- // Due to the depth-first traversal, there should be an end.cf
- // intrinsic in Parent, and we insert an else.break before it.
- //
- // Note that the end.cf need not be the first non-phi instruction
- // of parent, particularly when we're dealing with a multi-level
- // break, but it should occur within a group of intrinsic calls
- // at the beginning of the block.
- CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
- while (OldEnd && OldEnd->getCalledFunction() != EndCf)
- OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode());
- if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
- Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
- Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
- continue;
- }
- }
-
- TerminatorInst *Insert = From->getTerminator();
- Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
- NewPhi->setIncomingValue(i, PhiArg);
- }
-
- LoopPhiConditions.push_back(WeakTrackingVH(Phi));
- return Ret;
- }
-
+ Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
@@ -335,21 +261,15 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
- SmallVector<WeakTrackingVH, 8> LoopPhiConditions;
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
+ Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
for (BasicBlock *Pred : predecessors(Target))
Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
- for (WeakTrackingVH Val : llvm::reverse(LoopPhiConditions)) {
- if (PHINode *Cond = cast_or_null<PHINode>(Val))
- eraseIfUnused(Cond);
- }
-
push(Term->getSuccessor(0), Arg);
}
@@ -372,7 +292,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
Preds.push_back(Pred);
}
- BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
+ BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr,
+ false);
}
Value *Exec = popSaved();
@@ -386,7 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DA = &getAnalysis<DivergenceAnalysis>();
+ DA = &getAnalysis<LegacyDivergenceAnalysis>();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index a6d28d6999e5..7f6abc34cff3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -88,7 +88,10 @@ enum : uint64_t {
IsPacked = UINT64_C(1) << 49,
// Is a D16 buffer instruction.
- D16Buf = UINT64_C(1) << 50
+ D16Buf = UINT64_C(1) << 50,
+
+ // Uses floating point double precision rounding mode
+ FPDPRounding = UINT64_C(1) << 51
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 566e0d3febc7..809f5bab4693 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -183,13 +183,15 @@ getCopyRegClasses(const MachineInstr &Copy,
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
- return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+ return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
+ TRI.hasVGPRs(SrcRC);
}
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
- return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
+ return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
+ TRI.hasVGPRs(DstRC);
}
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -327,9 +329,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
switch (DefInstr->getOpcode()) {
default:
break;
- case AMDGPU::SI_BREAK:
case AMDGPU::SI_IF_BREAK:
- case AMDGPU::SI_ELSE_BREAK:
return true;
case AMDGPU::PHI:
if (phiHasBreakDef(*DefInstr, MRI, Visited))
@@ -599,7 +599,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
unsigned SrcReg = MI.getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
break;
}
@@ -614,7 +614,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MI.setDesc(TII->get(SMovOp));
break;
}
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
} else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
@@ -677,7 +677,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 8> Visited;
if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
}
break;
}
@@ -690,7 +690,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
break;
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
@@ -700,7 +700,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
- TII->moveToVALU(MI);
+ TII->moveToVALU(MI, MDT);
}
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
index 5d613d8874fa..7761418c5336 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
@@ -10,7 +10,7 @@
/// \file
/// Computations in WWM can overwrite values in inactive channels for
/// variables that the register allocator thinks are dead. This pass adds fake
-/// uses of those variables to WWM instructions to make sure that they aren't
+/// uses of those variables to their def(s) to make sure that they aren't
/// overwritten.
///
/// As an example, consider this snippet:
@@ -29,25 +29,44 @@
/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
/// it would clobber even the inactive channels for which the if-condition is
/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
-/// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
+/// of %vgpr0 to its def to make sure they aren't allocated to the
/// same register.
///
/// In general, we need to figure out what registers might have their inactive
/// channels which are eventually used accidentally clobbered by a WWM
-/// instruction. We approximate this using two conditions:
+/// instruction. We do that by spotting three separate cases of registers:
///
-/// 1. A definition of the variable reaches the WWM instruction.
-/// 2. The variable would be live at the WWM instruction if all its defs were
-/// partial defs (i.e. considered as a use), ignoring normal uses.
+/// 1. A "then phi": the value resulting from phi elimination of a phi node at
+/// the end of an if..endif. If there is WWM code in the "then", then we
+/// make the def at the end of the "then" branch a partial def by adding an
+/// implicit use of the register.
///
-/// If a register matches both conditions, then we add an implicit use of it to
-/// the WWM instruction. Condition #2 is the heart of the matter: every
-/// definition is really a partial definition, since every VALU instruction is
-/// implicitly predicated. We can usually ignore this, but WWM forces us not
-/// to. Condition #1 prevents false positives if the variable is undefined at
-/// the WWM instruction anyways. This is overly conservative in certain cases,
-/// especially in uniform control flow, but this is a workaround anyways until
-/// LLVM gains the notion of predicated uses and definitions of variables.
+/// 2. A "loop exit register": a value written inside a loop but used outside the
+/// loop, where there is WWM code inside the loop (the case in the example
+/// above). We add an implicit_def of the register in the loop pre-header,
+/// and make the original def a partial def by adding an implicit use of the
+/// register.
+///
+/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
+/// in a loop header. If there is WWM code inside the loop, then we make all
+/// defs inside the loop partial defs by adding an implicit use of the
+/// register on each one.
+///
+/// Note that we do not need to consider an if..else..endif phi. We only need to
+/// consider non-uniform control flow, and control flow structurization would
+/// have transformed a non-uniform if..else..endif into two if..endifs.
+///
+/// The analysis to detect these cases relies on a property of the MIR
+/// arising from this pass running straight after PHIElimination and before any
+/// coalescing: that any virtual register with more than one definition must be
+/// the new register added to lower a phi node by PHIElimination.
+///
+/// FIXME: We should detect whether a register in one of the above categories is
+/// already live at the WWM code before deciding to add the implicit uses to
+/// synthesize its liveness.
+///
+/// FIXME: I believe this whole scheme may be flawed due to the possibility of
+/// the register allocator doing live interval splitting.
///
//===----------------------------------------------------------------------===//
@@ -59,7 +78,9 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -71,10 +92,18 @@ namespace {
class SIFixWWMLiveness : public MachineFunctionPass {
private:
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *LoopInfo;
LiveIntervals *LIS = nullptr;
+ const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ std::vector<MachineInstr *> WWMs;
+ std::vector<MachineOperand *> ThenDefs;
+ std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
+ std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
+
public:
static char ID;
@@ -84,13 +113,11 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- bool runOnWWMInstruction(MachineInstr &MI);
-
- void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
-
StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequiredID(MachineLoopInfoID);
// Should preserve the same set that TwoAddressInstructions does.
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -100,11 +127,21 @@ public:
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ void processDef(MachineOperand &DefOpnd);
+ bool processThenDef(MachineOperand *DefOpnd);
+ bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
+ bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
};
} // End anonymous namespace.
-INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
+ "SI fix WWM liveness", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
"SI fix WWM liveness", false, false)
char SIFixWWMLiveness::ID = 0;
@@ -115,89 +152,267 @@ FunctionPass *llvm::createSIFixWWMLivenessPass() {
return new SIFixWWMLiveness();
}
-void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
-{
- for (const MachineOperand &Op : MI.defs()) {
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- if (TRI->isVGPR(*MRI, Reg))
- Regs.set(Reg);
- }
- }
-}
+bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
+ bool Modified = false;
+
+ // This doesn't actually need LiveIntervals, but we can preserve them.
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
-bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
- MachineBasicBlock *MBB = WWM.getParent();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- // Compute the registers that are live out of MI by figuring out which defs
- // are reachable from MI.
- SparseBitVector<> LiveOut;
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
- for (auto II = MachineBasicBlock::iterator(WWM), IE =
- MBB->end(); II != IE; ++II) {
- addDefs(*II, LiveOut);
- }
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ LoopInfo = &getAnalysis<MachineLoopInfo>();
- for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
- E = df_end(MBB);
- I != E; ++I) {
- for (const MachineInstr &MI : **I) {
- addDefs(MI, LiveOut);
+ // Scan the function to find the WWM sections and the candidate registers for
+ // having liveness modified.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == AMDGPU::EXIT_WWM)
+ WWMs.push_back(&MI);
+ else {
+ for (MachineOperand &DefOpnd : MI.defs()) {
+ if (DefOpnd.isReg()) {
+ unsigned Reg = DefOpnd.getReg();
+ if (TRI->isVGPR(*MRI, Reg))
+ processDef(DefOpnd);
+ }
+ }
+ }
}
}
+ if (!WWMs.empty()) {
+ // Synthesize liveness over WWM sections as required.
+ for (auto ThenDef : ThenDefs)
+ Modified |= processThenDef(ThenDef);
+ for (auto LoopExitDef : LoopExitDefs)
+ Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
+ for (auto LoopPhiDef : LoopPhiDefs)
+ Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
+ }
- // Compute the registers that reach MI.
- SparseBitVector<> Reachable;
+ WWMs.clear();
+ ThenDefs.clear();
+ LoopExitDefs.clear();
+ LoopPhiDefs.clear();
- for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
- MBB->rend(); II != IE; ++II) {
- addDefs(*II, Reachable);
- }
+ return Modified;
+}
- for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
- E = idf_end(MBB);
- I != E; ++I) {
- for (const MachineInstr &MI : **I) {
- addDefs(MI, Reachable);
+// During the function scan, process an operand that defines a VGPR.
+// This categorizes the register and puts it in the appropriate list for later
+// use when processing a WWM section.
+void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
+ unsigned Reg = DefOpnd.getReg();
+ // Get all the defining instructions. For convenience, make Defs[0] the def
+ // we are on now.
+ SmallVector<const MachineInstr *, 4> Defs;
+ Defs.push_back(DefOpnd.getParent());
+ for (auto &MI : MRI->def_instructions(Reg)) {
+ if (&MI != DefOpnd.getParent())
+ Defs.push_back(&MI);
+ }
+ // Check whether this def dominates all the others. If not, ignore this def.
+ // Either it is going to be processed when the scan encounters its other def
+ // that dominates all defs, or there is no def that dominates all others.
+ // The latter case is an eliminated phi from an if..else..endif or similar,
+ // which must be for uniform control flow so can be ignored.
+ // Because this pass runs shortly after PHIElimination, we assume that any
+ // multi-def register is a lowered phi, and thus has each def in a separate
+ // basic block.
+ for (unsigned I = 1; I != Defs.size(); ++I) {
+ if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
+ return;
+ }
+ // Check for the case of an if..endif lowered phi: It has two defs, one
+ // dominates the other, and there is a single use in a successor of the
+ // dominant def.
+ // Later we will spot any WWM code inside
+ // the "then" clause and turn the second def into a partial def so its
+ // liveness goes through the WWM code in the "then" clause.
+ if (Defs.size() == 2) {
+ auto DomDefBlock = Defs[0]->getParent();
+ if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
+ auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+ for (auto Succ : DomDefBlock->successors()) {
+ if (Succ == UseBlock) {
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
+ ThenDefs.push_back(&DefOpnd);
+ return;
+ }
+ }
}
}
-
- // find the intersection, and add implicit uses.
- LiveOut &= Reachable;
-
- bool Modified = false;
- for (unsigned Reg : LiveOut) {
- WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
- if (LIS) {
- // FIXME: is there a better way to update the live interval?
- LIS->removeInterval(Reg);
- LIS->createAndComputeVirtRegInterval(Reg);
+ // Check for the case of a non-lowered-phi register (single def) that exits
+ // a loop, that is, it has a use that is outside a loop that the def is
+ // inside. We find the outermost loop that the def is inside but a use is
+ // outside. Later we will spot any WWM code inside that loop and then make
+ // the def a partial def so its liveness goes round the loop and through the
+ // WWM code.
+ if (Defs.size() == 1) {
+ auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
+ if (!Loop)
+ return;
+ bool IsLoopExit = false;
+ for (auto &Use : MRI->use_instructions(Reg)) {
+ auto UseBlock = Use.getParent();
+ if (Loop->contains(UseBlock))
+ continue;
+ IsLoopExit = true;
+ while (auto Parent = Loop->getParentLoop()) {
+ if (Parent->contains(UseBlock))
+ break;
+ Loop = Parent;
+ }
}
- Modified = true;
+ if (!IsLoopExit)
+ return;
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is a loop exit reg with loop header at "
+ << "bb." << Loop->getHeader()->getNumber() << "\n");
+ LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
+ &DefOpnd, Loop));
+ return;
}
-
- return Modified;
+ // Check for the case of a lowered single-preheader-loop phi, that is, a
+ // multi-def register where the dominating def is in the loop pre-header and
+ // all other defs are in backedges. Later we will spot any WWM code inside
+ // that loop and then make the backedge defs partial defs so the liveness
+ // goes through the WWM code.
+ // Note that we are ignoring multi-preheader loops on the basis that the
+ // structurizer does not allow that for non-uniform loops.
+ // There must be a single use in the loop header.
+ if (!MRI->hasOneUse(Reg))
+ return;
+ auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+ auto Loop = LoopInfo->getLoopFor(UseBlock);
+ if (!Loop || Loop->getHeader() != UseBlock
+ || Loop->contains(Defs[0]->getParent())) {
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is multi-def but single use not in loop header\n");
+ return;
+ }
+ for (unsigned I = 1; I != Defs.size(); ++I) {
+ if (!Loop->contains(Defs[I]->getParent()))
+ return;
+ }
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+ << " is a loop phi reg with loop header at "
+ << "bb." << Loop->getHeader()->getNumber() << "\n");
+ LoopPhiDefs.push_back(
+ std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
}
-bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
- bool Modified = false;
-
- // This doesn't actually need LiveIntervals, but we can preserve them.
- LIS = getAnalysisIfAvailable<LiveIntervals>();
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
-
- TRI = &TII->getRegisterInfo();
- MRI = &MF.getRegInfo();
+// Process a then phi def: It has two defs, one dominates the other, and there
+// is a single use in a successor of the dominant def. Here we spot any WWM
+// code inside the "then" clause and turn the second def into a partial def so
+// its liveness goes through the WWM code in the "then" clause.
+bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
+ LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
+ if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Ignore if dominating def is undef.
+ LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
+ return false;
+ }
+ unsigned Reg = DefOpnd->getReg();
+ // Get the use block, which is the endif block.
+ auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
+ // Check whether there is WWM code inside the then branch. The WWM code must
+ // be dominated by the if but not dominated by the endif.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
+ && !DomTree->dominates(UseBlock, WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
+ }
+ }
+ if (!ContainsWWM)
+ return false;
+ // Get the other def.
+ MachineInstr *OtherDef = nullptr;
+ for (auto &MI : MRI->def_instructions(Reg)) {
+ if (&MI != DefOpnd->getParent())
+ OtherDef = &MI;
+ }
+ // Make it a partial def.
+ OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *OtherDef);
+ return true;
+}
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
- Modified |= runOnWWMInstruction(MI);
- }
+// Process a loop exit def, that is, a register with a single use in a loop
+// that has a use outside the loop. Here we spot any WWM code inside that loop
+// and then make the def a partial def so its liveness goes round the loop and
+// through the WWM code.
+bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
+ MachineLoop *Loop) {
+ LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
+ // Check whether there is WWM code inside the loop.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (Loop->contains(WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
}
}
+ if (!ContainsWWM)
+ return false;
+ unsigned Reg = DefOpnd->getReg();
+ // Add a new implicit_def in loop preheader(s).
+ for (auto Pred : Loop->getHeader()->predecessors()) {
+ if (!Loop->contains(Pred)) {
+ auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+ LLVM_DEBUG(dbgs() << *ImplicitDef);
+ (void)ImplicitDef;
+ }
+ }
+ // Make the original def partial.
+ DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
+ Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
+ return true;
+}
- return Modified;
+// Process a loop phi def, that is, a multi-def register where the dominating
+// def is in the loop pre-header and all other defs are in backedges. Here we
+// spot any WWM code inside that loop and then make the backedge defs partial
+// defs so the liveness goes through the WWM code.
+bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
+ MachineLoop *Loop) {
+ LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
+ // Check whether there is WWM code inside the loop.
+ bool ContainsWWM = false;
+ for (auto WWM : WWMs) {
+ if (Loop->contains(WWM->getParent())) {
+ LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
+ ContainsWWM = true;
+ break;
+ }
+ }
+ if (!ContainsWWM)
+ return false;
+ unsigned Reg = DefOpnd->getReg();
+ // Remove kill mark from uses.
+ for (auto &Use : MRI->use_operands(Reg))
+ Use.setIsKill(false);
+ // Make all defs except the dominating one partial defs.
+ SmallVector<MachineInstr *, 4> Defs;
+ for (auto &Def : MRI->def_instructions(Reg))
+ Defs.push_back(&Def);
+ for (auto Def : Defs) {
+ if (DefOpnd->getParent() == Def)
+ continue;
+ Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+ LLVM_DEBUG(dbgs() << *Def);
+ }
+ return true;
}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
new file mode 100644
index 000000000000..ee39eb04d831
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -0,0 +1,231 @@
+//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
+/// Currently this will convert GLOBAL_{LOAD|STORE}_*
+/// and GLOBAL_Atomic_* instructions into their _SADDR variants,
+/// feeding the sreg into the saddr field of the new instruction.
+/// We currently handle a REG_SEQUENCE feeding the vaddr
+/// and decompose it into a base and index.
+///
+/// Transform:
+/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
+/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
+/// %24:vgpr_32, %19:sreg_64_xexec
+/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
+/// %11:vreg_64 = COPY %16:vreg_64
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
+/// Into:
+/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
+/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#define DEBUG_TYPE "si-fixup-vector-isel"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableGlobalSGPRAddr(
+ "amdgpu-enable-global-sgpr-addr",
+ cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
+ cl::init(false));
+
+STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
+STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
+
+namespace {
+
+class SIFixupVectorISel : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixupVectorISel() : MachineFunctionPass(ID) {
+ initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
+ "SI Fixup Vector ISel", false, false)
+
+char SIFixupVectorISel::ID = 0;
+
+char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
+
+FunctionPass *llvm::createSIFixupVectorISelPass() {
+ return new SIFixupVectorISel();
+}
+
+static bool findSRegBaseAndIndex(MachineOperand *Op,
+ unsigned &BaseReg,
+ unsigned &IndexReg,
+ MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI) {
+ SmallVector<MachineOperand *, 8> Worklist;
+ Worklist.push_back(Op);
+ while (!Worklist.empty()) {
+ MachineOperand *WOp = Worklist.pop_back_val();
+ if (!WOp->isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(WOp->getReg()))
+ continue;
+ MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
+ switch (DefInst->getOpcode()) {
+ default:
+ continue;
+ case AMDGPU::COPY:
+ Worklist.push_back(&DefInst->getOperand(1));
+ break;
+ case AMDGPU::REG_SEQUENCE:
+ if (DefInst->getNumOperands() != 5)
+ continue;
+ Worklist.push_back(&DefInst->getOperand(1));
+ Worklist.push_back(&DefInst->getOperand(3));
+ break;
+ case AMDGPU::V_ADD_I32_e64:
+ // The V_ADD_* and its analogous V_ADDCV_* are generated by
+ // a previous pass which lowered from an ADD_64_PSEUDO,
+ // which generates subregs to break up the 64 bit args.
+ if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
+ continue;
+ BaseReg = DefInst->getOperand(2).getReg();
+ if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
+ continue;
+ IndexReg = DefInst->getOperand(3).getReg();
+ // Chase the IndexReg.
+ MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
+ if (!MI || !MI->isCopy())
+ continue;
+ // Make sure the reg class is 64 bit for Index.
+ // If the Index register is a subreg, we want it to reference
+ // a 64 bit register which we will use as the Index reg.
+ const TargetRegisterClass *IdxRC, *BaseRC;
+ IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
+ if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
+ continue;
+ IndexReg = MI->getOperand(1).getReg();
+ // Chase the BaseReg.
+ MI = MRI.getUniqueVRegDef(BaseReg);
+ if (!MI || !MI->isCopy())
+ continue;
+ // Make sure the register class is 64 bit for Base.
+ BaseReg = MI->getOperand(1).getReg();
+ BaseRC = MRI.getRegClass(BaseReg);
+ if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
+ continue;
+ // Make sure Base is SReg and Index is VReg.
+ if (!TRI->isSGPRReg(MRI, BaseReg))
+ return false;
+ if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
+ return false;
+ // clear any killed flags on Index and Base regs, used later.
+ MRI.clearKillFlags(IndexReg);
+ MRI.clearKillFlags(BaseReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
+static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ MachineRegisterInfo &MRI,
+ const GCNSubtarget &ST,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI) {
+ if (!EnableGlobalSGPRAddr)
+ return false;
+ bool FuncModified = false;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+ int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
+ if (NewOpcd < 0)
+ continue;
+ // Update our statistics on opportunities seen.
+ ++NumSGPRGlobalOccurs;
+ LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
+ // Need a Base and Index or we cant transform to _SADDR.
+ unsigned BaseReg = 0;
+ unsigned IndexReg = 0;
+ MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+ if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
+ continue;
+ ++NumSGPRGlobalSaddrs;
+ FuncModified = true;
+ // Create the new _SADDR Memory instruction.
+ bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
+ MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
+ MachineInstr *NewGlob = nullptr;
+ NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
+ if (HasVdst)
+ NewGlob->addOperand(MF, MI.getOperand(0));
+ NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
+ if (VData)
+ NewGlob->addOperand(MF, *VData);
+ NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
+ NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
+
+ MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
+ // Atomics dont have a GLC, so omit the field if not there.
+ if (Glc)
+ NewGlob->addOperand(MF, *Glc);
+ NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
+ // _D16 have an vdst_in operand, copy it in.
+ MachineOperand *VDstInOp = TII->getNamedOperand(MI,
+ AMDGPU::OpName::vdst_in);
+ if (VDstInOp)
+ NewGlob->addOperand(MF, *VDstInOp);
+ NewGlob->copyImplicitOps(MF, MI);
+ NewGlob->cloneMemRefs(MF, MI);
+ // Remove the old Global Memop instruction.
+ MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
+ }
+ return FuncModified;
+}
+
+bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ bool FuncModified = false;
+ for (MachineBasicBlock &MBB : MF) {
+ // Cleanup missed Saddr opportunites from ISel.
+ FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
+ }
+ return FuncModified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 338cabcb906b..f4e866958369 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -35,13 +35,16 @@ struct FoldCandidate {
uint64_t ImmToFold;
int FrameIndexToFold;
};
+ int ShrinkOpcode;
unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind;
bool Commuted;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false) :
- UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+ bool Commuted_ = false,
+ int ShrinkOp = -1) :
+ UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Kind(FoldOp->getType()),
Commuted(Commuted_) {
if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm();
@@ -68,6 +71,14 @@ struct FoldCandidate {
bool isCommuted() const {
return Commuted;
}
+
+ bool needsShrink() const {
+ return ShrinkOpcode != -1;
+ }
+
+ int getShrinkOpcode() const {
+ return ShrinkOpcode;
+ }
};
class SIFoldOperands : public MachineFunctionPass {
@@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
}
static bool updateOperand(FoldCandidate &Fold,
+ const SIInstrInfo &TII,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@@ -189,10 +201,49 @@ static bool updateOperand(FoldCandidate &Fold,
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
}
}
+
+ if (Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
+
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
+
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+ const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
+ unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
+ }
+
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace the outputs with dummy registers.
+ Dst0.setReg(NewReg0);
+ Dst1.setReg(NewReg1);
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
+ return true;
+ }
+
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
+ assert(!Fold.needsShrink() && "not handled");
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -261,6 +312,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
+ unsigned CommuteOpNo = OpNo;
+
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -269,11 +322,12 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (CanCommute) {
if (CommuteIdx0 == OpNo)
- OpNo = CommuteIdx1;
+ CommuteOpNo = CommuteIdx1;
else if (CommuteIdx1 == OpNo)
- OpNo = CommuteIdx0;
+ CommuteOpNo = CommuteIdx0;
}
+
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
@@ -286,12 +340,34 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false;
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+ if ((Opc == AMDGPU::V_ADD_I32_e64 ||
+ Opc == AMDGPU::V_SUB_I32_e64 ||
+ Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+ OpToFold->isImm()) {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
+ MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
+ return false;
+
+ assert(MI->getOperand(1).isDef());
+
+ int Op32 = AMDGPU::getVOPe32(Opc);
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
+ Op32));
+ return true;
+ }
+
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false;
}
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
return true;
}
@@ -362,8 +438,6 @@ void SIFoldOperands::foldOperand(
bool FoldingImm = OpToFold.isImm();
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
if (FoldingImm && UseMI->isCopy()) {
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
@@ -371,6 +445,31 @@ void SIFoldOperands::foldOperand(
MRI->getRegClass(DestReg) :
TRI->getPhysRegClass(DestReg);
+ unsigned SrcReg = UseMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
+ if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
+ MachineRegisterInfo::use_iterator NextUse;
+ SmallVector<FoldCandidate, 4> CopyUses;
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI->use_begin(DestReg), E = MRI->use_end();
+ Use != E; Use = NextUse) {
+ NextUse = std::next(Use);
+ FoldCandidate FC = FoldCandidate(Use->getParent(),
+ Use.getOperandNo(), &UseMI->getOperand(1));
+ CopyUses.push_back(FC);
+ }
+ for (auto & F : CopyUses) {
+ foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
+ FoldList, CopiesToReplace);
+ }
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
return;
@@ -378,6 +477,20 @@ void SIFoldOperands::foldOperand(
UseMI->setDesc(TII->get(MovOp));
CopiesToReplace.push_back(UseMI);
} else {
+ if (UseMI->isCopy() && OpToFold.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
+ TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+ !UseMI->getOperand(1).getSubReg()) {
+ UseMI->getOperand(1).setReg(OpToFold.getReg());
+ UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+ UseMI->getOperand(1).setIsKill(false);
+ CopiesToReplace.push_back(UseMI);
+ OpToFold.setIsKill(false);
+ return;
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
@@ -550,6 +663,19 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
+ if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+ if (Src0->isImm() && Src0->getImm() == 0) {
+ // v_lshl_or_b32 0, X, Y -> copy Y
+ // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+ bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+ MI->RemoveOperand(Src1Idx);
+ MI->RemoveOperand(Src0Idx);
+
+ MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+ return true;
+ }
+ }
+
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
@@ -728,13 +854,17 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
} else {
// Folding register.
+ SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
for (MachineRegisterInfo::use_iterator
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
Use != E; ++Use) {
- MachineInstr *UseMI = Use->getParent();
+ UsesToProcess.push_back(Use);
+ }
+ for (auto U : UsesToProcess) {
+ MachineInstr *UseMI = U->getParent();
- foldOperand(OpToFold, UseMI, Use.getOperandNo(),
- FoldList, CopiesToReplace);
+ foldOperand(OpToFold, UseMI, U.getOperandNo(),
+ FoldList, CopiesToReplace);
}
}
@@ -744,7 +874,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, *TRI)) {
+ if (updateOperand(Fold, *TII, *TRI)) {
// Clear kill flags.
if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
@@ -981,9 +1111,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
// correctly handle signed zeros.
//
- // TODO: Check nsz on instructions when fast math flags are preserved to MI
- // level.
- bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+ bool IsIEEEMode = ST->enableIEEEBit(MF);
+ bool HasNSZ = MFI->hasNoSignedZerosFPMath();
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
@@ -994,7 +1123,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
tryFoldInst(TII, &MI);
if (!TII->isFoldableCopy(MI)) {
- if (IsIEEEMode || !tryFoldOMod(MI))
+ // TODO: Omod might be OK if there is NSZ only on the source
+ // instruction, and not the omod multiply.
+ if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
+ !tryFoldOMod(MI))
tryFoldClamp(MI);
continue;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index cd14239de822..aa976d5141f8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -168,16 +168,15 @@ void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
CoveringSubregs.push_back(Idx);
}
- llvm::sort(CoveringSubregs.begin(), CoveringSubregs.end(),
- [this](unsigned A, unsigned B) {
- LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
- LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
- unsigned NA = MaskA.getNumLanes();
- unsigned NB = MaskB.getNumLanes();
- if (NA != NB)
- return NA > NB;
- return MaskA.getHighestLane() > MaskB.getHighestLane();
- });
+ llvm::sort(CoveringSubregs, [this](unsigned A, unsigned B) {
+ LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
+ LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
+ unsigned NA = MaskA.getNumLanes();
+ unsigned NB = MaskB.getNumLanes();
+ if (NA != NB)
+ return NA > NB;
+ return MaskA.getHighestLane() > MaskB.getHighestLane();
+ });
for (unsigned Idx : CoveringSubregs) {
LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ac0ef90f25a4..e4633c88e18f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -289,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2(F)) {
+ if (ST.isAmdHsaOrMesa(F)) {
PreloadedPrivateBufferReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -308,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
+ assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@@ -333,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
- ST.isAmdCodeObjectV2(F) &&
+ ST.isAmdHsaOrMesa(F) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@@ -433,7 +433,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
}
if (ST.isMesaGfxShader(Fn)
|| (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
- assert(!ST.isAmdCodeObjectV2(Fn));
+ assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 25007861fd15..0ba921647097 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#ifdef _MSC_VER
+#if defined(_MSC_VER) || defined(__MINGW32__)
// Provide M_PI.
#define _USE_MATH_DEFINES
#endif
@@ -156,12 +156,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v32i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
@@ -207,11 +209,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
@@ -232,6 +237,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+
#if 0
setOperationAction(ISD::ADDCARRY, MVT::i64, Legal);
setOperationAction(ISD::SUBCARRY, MVT::i64, Legal);
@@ -240,7 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
- MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16 }) {
+ MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -339,6 +348,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FLOG, MVT::f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
@@ -375,8 +385,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Custom);
+
+
+ // These are really only legal for ieee_mode functions. We should be avoiding
+ // them for functions that don't have ieee_mode enabled, so just say they are
+ // legal.
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -465,8 +487,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+
setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
@@ -549,6 +570,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// This isn't really legal, but this avoids the legalizer unrolling it (and
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
}
if (Subtarget->hasVOP3PInsts()) {
@@ -566,8 +598,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v2f16, Legal);
setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
setOperationAction(ISD::FMA, MVT::v2f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal);
+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
@@ -587,9 +621,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::v4f16, Custom);
setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+
+ setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
+
setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
}
@@ -623,6 +663,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
+ setTargetDAGCombine(ISD::FMINNUM_IEEE);
+ setTargetDAGCombine(ISD::FMAXNUM_IEEE);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);
@@ -638,7 +680,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -707,9 +749,7 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (Size == 64)
return MVT::i32;
- if (Size == 16 &&
- Subtarget->has16BitInsts() &&
- isPowerOf2_32(VT.getVectorNumElements()))
+ if (Size == 16 && Subtarget->has16BitInsts())
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
}
@@ -730,9 +770,8 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
if (Size == 64)
return 2 * NumElts;
- // FIXME: Fails to break down as we want with v3.
- if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
- return VT.getVectorNumElements() / 2;
+ if (Size == 16 && Subtarget->has16BitInsts())
+ return (VT.getVectorNumElements() + 1) / 2;
}
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -763,10 +802,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
// FIXME: We should fix the ABI to be the same on targets without 16-bit
// support, but unless we can properly handle 3-vectors, it will be still be
// inconsistent.
- if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+ if (Size == 16 && Subtarget->has16BitInsts()) {
RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
IntermediateVT = RegisterVT;
- NumIntermediates = NumElts / 2;
+ NumIntermediates = (NumElts + 1) / 2;
return NumIntermediates;
}
}
@@ -775,6 +814,47 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
}
+static MVT memVTFromAggregate(Type *Ty) {
+ // Only limited forms of aggregate type currently expected.
+ assert(Ty->isStructTy() && "Expected struct type");
+
+
+ Type *ElementType = nullptr;
+ unsigned NumElts;
+ if (Ty->getContainedType(0)->isVectorTy()) {
+ VectorType *VecComponent = cast<VectorType>(Ty->getContainedType(0));
+ ElementType = VecComponent->getElementType();
+ NumElts = VecComponent->getNumElements();
+ } else {
+ ElementType = Ty->getContainedType(0);
+ NumElts = 1;
+ }
+
+ assert((Ty->getContainedType(1) && Ty->getContainedType(1)->isIntegerTy(32)) && "Expected int32 type");
+
+ // Calculate the size of the memVT type from the aggregate
+ unsigned Pow2Elts = 0;
+ unsigned ElementSize;
+ switch (ElementType->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ case Type::IntegerTyID:
+ ElementSize = cast<IntegerType>(ElementType)->getBitWidth();
+ break;
+ case Type::HalfTyID:
+ ElementSize = 16;
+ break;
+ case Type::FloatTyID:
+ ElementSize = 32;
+ break;
+ }
+ unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
+ Pow2Elts = 1 << Log2_32_Ceil(NumElts + AdditionalElts);
+
+ return MVT::getVectorVT(MVT::getVT(ElementType, false),
+ Pow2Elts);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -802,7 +882,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getType());
+ Info.memVT = MVT::getVT(CI.getType(), true);
+ if (Info.memVT == MVT::Other) {
+ // Some intrinsics return an aggregate type - special case to work out
+ // the correct memVT
+ Info.memVT = memVTFromAggregate(CI.getType());
+ }
Info.flags |= MachineMemOperand::MOLoad;
} else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
@@ -941,11 +1026,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
- if (AS == AMDGPUASI.GLOBAL_ADDRESS)
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return isLegalGlobalAddressingMode(AM);
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -983,10 +1068,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
- AS == AMDGPUASI.REGION_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ AS == AMDGPUAS::REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -1001,8 +1086,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
+ } else if (AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
@@ -1016,12 +1101,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const {
- if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
return (MemVT.getSizeInBits() <= MaxPrivateBits);
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);
}
return true;
@@ -1043,8 +1128,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return false;
}
- if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
- AddrSpace == AMDGPUASI.REGION_ADDRESS) {
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
@@ -1059,17 +1144,21 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (!Subtarget->hasUnalignedScratchAccess() &&
- (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
- return false;
+ (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
+ AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+ bool AlignedBy4 = Align >= 4;
+ if (IsFast)
+ *IsFast = AlignedBy4;
+
+ return AlignedBy4;
}
if (Subtarget->hasUnalignedBufferAccess()) {
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
- *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
- AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
+ *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
(Align % 4 == 0) : true;
}
@@ -1109,17 +1198,15 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
- return AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
- return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
- isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
+ return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -1133,7 +1220,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
- if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -1146,7 +1233,7 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
}
TargetLoweringBase::LegalizeTypeAction
-SITargetLowering::getPreferredVectorAction(EVT VT) const {
+SITargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
return TypeSplitVector;
@@ -1200,7 +1287,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
= Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
@@ -1240,7 +1327,7 @@ SDValue SITargetLowering::lowerKernargMemParameter(
uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg) const {
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
// Try to avoid using an extload by loading earlier than the argument address,
@@ -1349,7 +1436,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
const ISD::InputArg *Arg = &Ins[I];
- assert(!Arg->VT.isVector() && "vector type argument should have been split");
+ assert((!Arg->VT.isVector() || Arg->VT.getScalarSizeInBits() == 16) &&
+ "vector type argument should have been split");
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
@@ -1642,7 +1730,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.isAmdCodeObjectV2(MF.getFunction())) {
+ if (ST.isAmdHsaOrMesa(MF.getFunction())) {
if (RequiresStackAccess) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -1951,29 +2039,6 @@ SDValue SITargetLowering::LowerFormalArguments(
llvm_unreachable("Unknown loc info!");
}
- if (IsShader && Arg.VT.isVector()) {
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
InVals.push_back(Val);
}
@@ -2037,48 +2102,19 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsShader = AMDGPU::isShader(CallConv);
- Info->setIfReturnsVoid(Outs.size() == 0);
+ Info->setIfReturnsVoid(Outs.empty());
bool IsWaveEnd = Info->returnsVoid() && IsShader;
- SmallVector<ISD::OutputArg, 48> Splits;
- SmallVector<SDValue, 48> SplitVals;
-
- // Split vectors into their elements.
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- const ISD::OutputArg &Out = Outs[i];
-
- if (IsShader && Out.VT.isVector()) {
- MVT VT = Out.VT.getVectorElementType();
- ISD::OutputArg NewOut = Out;
- NewOut.Flags.setSplit();
- NewOut.VT = VT;
-
- // We want the original number of vector elements here, e.g.
- // three or five, not four or eight.
- unsigned NumElements = Out.ArgVT.getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
- DAG.getConstant(j, DL, MVT::i32));
- SplitVals.push_back(Elem);
- Splits.push_back(NewOut);
- NewOut.PartOffset += NewOut.VT.getStoreSize();
- }
- } else {
- SplitVals.push_back(OutVals[i]);
- Splits.push_back(Out);
- }
- }
-
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 48> RVLocs;
+ SmallVector<ISD::OutputArg, 48> Splits;
// CCState - Info about the registers and stack slots.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze outgoing return values.
- CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
@@ -2103,14 +2139,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
// Copy the result values into the output registers.
- for (unsigned i = 0, realRVLocIdx = 0;
- i != RVLocs.size();
- ++i, ++realRVLocIdx) {
- CCValAssign &VA = RVLocs[i];
+ for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
+ ++I, ++RealRVLocIdx) {
+ CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
// TODO: Partially return in registers if return values don't fit.
-
- SDValue Arg = SplitVals[realRVLocIdx];
+ SDValue Arg = OutVals[RealRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {
@@ -2225,11 +2259,11 @@ SDValue SITargetLowering::LowerCallResult(
// from the explicit user arguments present in the IR.
void SITargetLowering::passSpecialInputs(
CallLoweringInfo &CLI,
+ CCState &CCInfo,
const SIMachineFunctionInfo &Info,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains,
- SDValue Chain,
- SDValue StackPtr) const {
+ SDValue Chain) const {
// If we don't have a call site, this was a call inserted by
// legalization. These can never use special inputs.
if (!CLI.CS)
@@ -2297,9 +2331,9 @@ void SITargetLowering::passSpecialInputs(
if (OutgoingArg->isRegister()) {
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
} else {
- SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, StackPtr,
- InputReg,
- OutgoingArg->getStackOffset());
+ unsigned SpecialArgOffset = CCInfo.AllocateStack(ArgVT.getStoreSize(), 4);
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
MemOpChains.push_back(ArgStore);
}
}
@@ -2424,6 +2458,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported call to variadic function ");
}
+ if (!CLI.CS.getInstruction())
+ report_fatal_error("unsupported libcall legalization");
+
if (!CLI.CS.getCalledFunction()) {
return lowerUnhandledCall(CLI, InVals,
"unsupported indirect call to function ");
@@ -2442,8 +2479,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// The first 4 bytes are reserved for the callee's emergency stack slot.
- const unsigned CalleeUsableStackOffset = 4;
-
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2463,25 +2498,16 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
++NumTailCalls;
}
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // FIXME: Remove this hack for function pointer types after removing
- // support of old address space mapping. In the new address space
- // mapping the pointer in default address space is 64 bit, therefore
- // does not need this hack.
- if (Callee.getValueType() == MVT::i32) {
- const GlobalValue *GV = GA->getGlobal();
- Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false,
- GA->getTargetFlags());
- }
- }
- assert(Callee.getValueType() == MVT::i64);
-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
+
+ // The first 4 bytes are reserved for the callee's emergency stack slot.
+ CCInfo.AllocateStack(4, 4);
+
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2529,10 +2555,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- // Stack pointer relative accesses are done by changing the offset SGPR. This
- // is just the VGPR offset component.
- SDValue StackPtr = DAG.getConstant(CalleeUsableStackOffset, DL, MVT::i32);
-
SmallVector<SDValue, 8> MemOpChains;
MVT PtrVT = MVT::i32;
@@ -2576,18 +2598,22 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset;
- SDValue PtrOff = DAG.getObjectPtrOffset(DL, StackPtr, Offset);
+ SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
+ unsigned Align = 0;
if (IsTailCall) {
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
unsigned OpSize = Flags.isByVal() ?
Flags.getByValSize() : VA.getValVT().getStoreSize();
+ // FIXME: We can have better than the minimum byval required alignment.
+ Align = Flags.isByVal() ? Flags.getByValAlign() :
+ MinAlign(Subtarget->getStackAlignment(), Offset);
+
Offset = Offset + FPDiff;
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
- DstAddr = DAG.getObjectPtrOffset(DL, DAG.getFrameIndex(FI, PtrVT),
- StackPtr);
+ DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
@@ -2601,6 +2627,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
} else {
DstAddr = PtrOff;
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
+ Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2611,18 +2638,18 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
/*isVol = */ false, /*AlwaysInline = */ true,
/*isTailCall = */ false, DstInfo,
MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy(
- *DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))));
+ *DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))));
MemOpChains.push_back(Cpy);
} else {
- SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
+ SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
MemOpChains.push_back(Store);
}
}
}
// Copy special input registers after user input arguments.
- passSpecialInputs(CLI, *Info, RegsToPass, MemOpChains, Chain, StackPtr);
+ passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -3460,7 +3487,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
MIB.add(MI.getOperand(I));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return BB;
}
@@ -3628,7 +3655,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerDEBUGTRAP(Op, DAG);
case ISD::FABS:
case ISD::FNEG:
+ case ISD::FCANONICALIZE:
return splitUnaryVectorOp(Op, DAG);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ return lowerFMINNUM_FMAXNUM(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -3639,10 +3670,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
case ISD::FADD:
case ISD::FMUL:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
return splitBinaryVectorOp(Op, DAG);
}
return SDValue();
@@ -3678,18 +3709,9 @@ static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
MemSDNode *M,
SelectionDAG &DAG,
+ ArrayRef<SDValue> Ops,
bool IsIntrinsic) const {
SDLoc DL(M);
- SmallVector<SDValue, 10> Ops;
- Ops.reserve(M->getNumOperands());
-
- Ops.push_back(M->getOperand(0));
- if (IsIntrinsic)
- Ops.push_back(DAG.getConstant(Opcode, DL, MVT::i32));
-
- // Skip 1, as it is the intrinsic ID.
- for (unsigned I = 2, E = M->getNumOperands(); I != E; ++I)
- Ops.push_back(M->getOperand(I));
bool Unpacked = Subtarget->hasUnpackedD16VMem();
EVT LoadVT = M->getValueType(0);
@@ -3717,6 +3739,69 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
}
+static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
+ CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
+ return DAG.getUNDEF(VT);
+
+ ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
+
+
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ SDLoc DL(N);
+
+ EVT CmpVT = LHS.getValueType();
+ if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
+ unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
+ ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
+ RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
+ }
+
+ ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
+
+ return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
+ DAG.getCondCode(CCOpcode));
+}
+
+static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+ CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
+ return DAG.getUNDEF(VT);
+ }
+
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT CmpVT = Src0.getValueType();
+ SDLoc SL(N);
+
+ if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
+ Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+ Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+ }
+
+ FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
+ ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
+ Src1, DAG.getCondCode(CCOpcode));
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -3761,8 +3846,13 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
- SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
- Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+ EVT VT = N->getValueType(0);
+ if (isTypeLegal(VT))
+ Results.push_back(DAG.getNode(Opcode, SL, VT, Src0, Src1));
+ else {
+ SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+ }
return;
}
}
@@ -3895,15 +3985,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
- return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -4038,6 +4128,23 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
+SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+ // FIXME: Assert during eslection that this is only selected for
+ // ieee_mode. Currently a combine can produce the ieee version for non-ieee
+ // mode functions, but this happens to be OK since it's only done in cases
+ // where there is known no sNaN.
+ if (IsIEEEMode)
+ return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
+
+ if (VT == MVT::v4f16)
+ return splitBinaryVectorOp(Op, DAG);
+ return Op;
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
@@ -4091,10 +4198,10 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.
if (Subtarget->hasApertureRegs()) {
- unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
- unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
unsigned Encoding =
@@ -4119,7 +4226,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
- uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
+ uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset);
@@ -4127,7 +4234,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
- AMDGPUASI.CONSTANT_ADDRESS));
+ AMDGPUAS::CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
@@ -4148,11 +4255,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
- if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
- DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
@@ -4164,11 +4271,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
// local/private -> flat
- if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
unsigned SrcAS = ASC->getSrcAddressSpace();
- if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
- SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
@@ -4335,30 +4442,39 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
}
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
+ assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
- Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
- Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
+ // Avoid adding defined bits with the zero_extend.
+ if (Hi.isUndef()) {
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
+ return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
+ }
- Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
DAG.getConstant(16, SL, MVT::i32));
+ if (Lo.isUndef())
+ return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
- SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+ SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+ return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
@@ -4409,18 +4525,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSD->getGlobal();
-
- if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
- GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
- // FIXME: It isn't correct to rely on the type of the pointer. This should
- // be removed when address space 0 is 64-bit.
- !GV->getType()->getElementType()->isFunctionTy())
+ if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
+ GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
SDLoc DL(GSD);
EVT PtrVT = Op.getValueType();
+ // FIXME: Should not make address space based decisions here.
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))
@@ -4431,11 +4544,11 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
- // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getGOT(DAG.getMachineFunction());
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
MachineMemOperand::MODereferenceable |
@@ -4547,11 +4660,115 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
return Value == 0;
}
+// Re-construct the required return value for a image load intrinsic.
+// This is more complicated due to the optional use TexFailCtrl which means the required
+// return type is an aggregate
+static SDValue constructRetValue(SelectionDAG &DAG,
+ MachineSDNode *Result,
+ ArrayRef<EVT> ResultTypes,
+ bool IsTexFail, bool Unpacked, bool IsD16,
+ int DMaskPop, int NumVDataDwords,
+ const SDLoc &DL, LLVMContext &Context) {
+ // Determine the required return type. This is the same regardless of IsTexFail flag
+ EVT ReqRetVT = ResultTypes[0];
+ EVT ReqRetEltVT = ReqRetVT.isVector() ? ReqRetVT.getVectorElementType() : ReqRetVT;
+ int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
+ EVT AdjEltVT = Unpacked && IsD16 ? MVT::i32 : ReqRetEltVT;
+ EVT AdjVT = Unpacked ? ReqRetNumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, ReqRetNumElts)
+ : AdjEltVT
+ : ReqRetVT;
+
+ // Extract data part of the result
+ // Bitcast the result to the same type as the required return type
+ int NumElts;
+ if (IsD16 && !Unpacked)
+ NumElts = NumVDataDwords << 1;
+ else
+ NumElts = NumVDataDwords;
+
+ EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
+ : AdjEltVT;
+
+ // Special case for v8f16. Rather than add support for this, use v4i32 to
+ // extract the data elements
+ bool V8F16Special = false;
+ if (CastVT == MVT::v8f16) {
+ CastVT = MVT::v4i32;
+ DMaskPop >>= 1;
+ ReqRetNumElts >>= 1;
+ V8F16Special = true;
+ AdjVT = MVT::v2i32;
+ }
+
+ SDValue N = SDValue(Result, 0);
+ SDValue CastRes = DAG.getNode(ISD::BITCAST, DL, CastVT, N);
+
+ // Iterate over the result
+ SmallVector<SDValue, 4> BVElts;
+
+ if (CastVT.isVector()) {
+ DAG.ExtractVectorElements(CastRes, BVElts, 0, DMaskPop);
+ } else {
+ BVElts.push_back(CastRes);
+ }
+ int ExtraElts = ReqRetNumElts - DMaskPop;
+ while(ExtraElts--)
+ BVElts.push_back(DAG.getUNDEF(AdjEltVT));
+
+ SDValue PreTFCRes;
+ if (ReqRetNumElts > 1) {
+ SDValue NewVec = DAG.getBuildVector(AdjVT, DL, BVElts);
+ if (IsD16 && Unpacked)
+ PreTFCRes = adjustLoadValueTypeImpl(NewVec, ReqRetVT, DL, DAG, Unpacked);
+ else
+ PreTFCRes = NewVec;
+ } else {
+ PreTFCRes = BVElts[0];
+ }
+
+ if (V8F16Special)
+ PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
+
+ if (!IsTexFail) {
+ if (Result->getNumValues() > 1)
+ return DAG.getMergeValues({PreTFCRes, SDValue(Result, 1)}, DL);
+ else
+ return PreTFCRes;
+ }
+
+ // Extract the TexFail result and insert into aggregate return
+ SmallVector<SDValue, 1> TFCElt;
+ DAG.ExtractVectorElements(N, TFCElt, DMaskPop, 1);
+ SDValue TFCRes = DAG.getNode(ISD::BITCAST, DL, ResultTypes[1], TFCElt[0]);
+ return DAG.getMergeValues({PreTFCRes, TFCRes, SDValue(Result, 1)}, DL);
+}
+
+static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
+ SDValue *LWE, bool &IsTexFail) {
+ auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
+ if (!TexFailCtrlConst)
+ return false;
+
+ uint64_t Value = TexFailCtrlConst->getZExtValue();
+ if (Value) {
+ IsTexFail = true;
+ }
+
+ SDLoc DL(TexFailCtrlConst);
+ *TFE = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x1;
+ *LWE = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x2;
+
+ return Value == 0;
+}
+
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
+ const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4559,12 +4776,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
- SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
+ SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
+ SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
+ bool AdjustRetType = false;
+
unsigned AddrIdx; // Index of first address argument
unsigned DMask;
+ unsigned DMaskLanes = 0;
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
@@ -4587,7 +4809,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
AddrIdx = 3;
}
} else {
- unsigned DMaskIdx;
+ unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
+ auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
+ if (!DMaskConst)
+ return Op;
+ DMask = DMaskConst->getZExtValue();
+ DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
if (BaseOpcode->Store) {
VData = Op.getOperand(2);
@@ -4603,58 +4830,91 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
- DMaskIdx = 3;
} else {
- MVT LoadVT = Op.getSimpleValueType();
+ // Work out the num dwords based on the dmask popcount and underlying type
+ // and whether packing is supported.
+ MVT LoadVT = ResultTypes[0].getSimpleVT();
if (LoadVT.getScalarType() == MVT::f16) {
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
!BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
- if (LoadVT.isVector() && Subtarget->hasUnpackedD16VMem())
- ResultTypes[0] = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
}
- NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32;
- DMaskIdx = isa<MemSDNode>(Op) ? 2 : 1;
- }
+ // Confirm that the return type is large enough for the dmask specified
+ if ((LoadVT.isVector() && LoadVT.getVectorNumElements() < DMaskLanes) ||
+ (!LoadVT.isVector() && DMaskLanes > 1))
+ return Op;
- auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
- if (!DMaskConst)
- return Op;
+ if (IsD16 && !Subtarget->hasUnpackedD16VMem())
+ NumVDataDwords = (DMaskLanes + 1) / 2;
+ else
+ NumVDataDwords = DMaskLanes;
- AddrIdx = DMaskIdx + 1;
- DMask = DMaskConst->getZExtValue();
- if (!DMask && !BaseOpcode->Store) {
- // Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they
- // store the channels' default values.
- SDValue Undef = DAG.getUNDEF(Op.getValueType());
- if (isa<MemSDNode>(Op))
- return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
- return Undef;
+ AdjustRetType = true;
}
+
+ AddrIdx = DMaskIdx + 1;
}
- unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+ unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+ unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+ unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
+ NumCoords + NumLCM;
+ unsigned NumMIVAddrs = NumVAddrs;
+
SmallVector<SDValue, 4> VAddrs;
- for (unsigned i = 0; i < NumVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {
if (auto ConstantLod =
- dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- VAddrs.pop_back(); // remove 'lod'
+ NumMIVAddrs--; // remove 'lod'
}
}
}
+ // Check for 16 bit addresses and pack if true.
+ unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+ const MVT VAddrScalarVT = VAddrVT.getScalarType();
+ if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
+ ST->hasFeature(AMDGPU::FeatureR128A16)) {
+ IsA16 = true;
+ const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+ for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
+ SDValue AddrLo, AddrHi;
+ // Push back extra arguments.
+ if (i < DimIdx) {
+ AddrLo = Op.getOperand(i);
+ } else {
+ AddrLo = Op.getOperand(i);
+ // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
+ // in 1D, derivatives dx/dh and dx/dv are packed with undef.
+ if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
+ ((NumGradients / 2) % 2 == 1 &&
+ (i == DimIdx + (NumGradients / 2) - 1 ||
+ i == DimIdx + NumGradients - 1))) {
+ AddrHi = DAG.getUNDEF(MVT::f16);
+ } else {
+ AddrHi = Op.getOperand(i + 1);
+ i++;
+ }
+ AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,
+ {AddrLo, AddrHi});
+ AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
+ }
+ VAddrs.push_back(AddrLo);
+ }
+ } else {
+ for (unsigned i = 0; i < NumMIVAddrs; ++i)
+ VAddrs.push_back(Op.getOperand(AddrIdx + i));
+ }
+
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4674,11 +4934,53 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
CtrlIdx = AddrIdx + NumVAddrs + 3;
}
+ SDValue TFE;
+ SDValue LWE;
SDValue TexFail = Op.getOperand(CtrlIdx);
- auto TexFailConst = dyn_cast<ConstantSDNode>(TexFail.getNode());
- if (!TexFailConst || TexFailConst->getZExtValue() != 0)
+ bool IsTexFail = false;
+ if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
return Op;
+ if (IsTexFail) {
+ if (!DMaskLanes) {
+ // Expecting to get an error flag since TFC is on - and dmask is 0
+ // Force dmask to be at least 1 otherwise the instruction will fail
+ DMask = 0x1;
+ DMaskLanes = 1;
+ NumVDataDwords = 1;
+ }
+ NumVDataDwords += 1;
+ AdjustRetType = true;
+ }
+
+ // Has something earlier tagged that the return type needs adjusting
+ // This happens if the instruction is a load or has set TexFailCtrl flags
+ if (AdjustRetType) {
+ // NumVDataDwords reflects the true number of dwords required in the return type
+ if (DMaskLanes == 0 && !BaseOpcode->Store) {
+ // This is a no-op load. This can be eliminated
+ SDValue Undef = DAG.getUNDEF(Op.getValueType());
+ if (isa<MemSDNode>(Op))
+ return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
+ return Undef;
+ }
+
+ // Have to use a power of 2 number of dwords
+ NumVDataDwords = 1 << Log2_32_Ceil(NumVDataDwords);
+
+ EVT NewVT = NumVDataDwords > 1 ?
+ EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
+ : MVT::f32;
+
+ ResultTypes[0] = NewVT;
+ if (ResultTypes.size() == 3) {
+ // Original result was aggregate type used for TexFailCtrl results
+ // The actual instruction returns as a vector type which has now been
+ // created. Remove the aggregate result.
+ ResultTypes.erase(&ResultTypes[1]);
+ }
+ }
+
SDValue GLC;
SDValue SLC;
if (BaseOpcode->Atomic) {
@@ -4701,9 +5003,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(Unorm);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(False); // r128
- Ops.push_back(False); // tfe
- Ops.push_back(False); // lwe
+ Ops.push_back(IsA16 && // a16 or r128
+ ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
+ Ops.push_back(TFE); // tfe
+ Ops.push_back(LWE); // lwe
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
@@ -4723,25 +5026,90 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
- *MemRefs = MemOp->getMemOperand();
- NewNode->setMemRefs(MemRefs, MemRefs + 1);
+ MachineMemOperand *MemRef = MemOp->getMemOperand();
+ DAG.setNodeMemRefs(NewNode, {MemRef});
}
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
- } else if (IsD16 && !BaseOpcode->Store) {
- MVT LoadVT = Op.getSimpleValueType();
- SDValue Adjusted = adjustLoadValueTypeImpl(
- SDValue(NewNode, 0), LoadVT, DL, DAG, Subtarget->hasUnpackedD16VMem());
- return DAG.getMergeValues({Adjusted, SDValue(NewNode, 1)}, DL);
+ } else if (!BaseOpcode->Store) {
+ return constructRetValue(DAG, NewNode,
+ OrigResultTypes, IsTexFail,
+ Subtarget->hasUnpackedD16VMem(), IsD16,
+ DMaskLanes, NumVDataDwords, DL,
+ *DAG.getContext());
}
return SDValue(NewNode, 0);
}
+SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
+ SDValue Offset, SDValue GLC,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ VT.getStoreSize(), VT.getStoreSize());
+
+ if (!Offset->isDivergent()) {
+ SDValue Ops[] = {
+ Rsrc,
+ Offset, // Offset
+ GLC // glc
+ };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
+ DAG.getVTList(VT), Ops, VT, MMO);
+ }
+
+ // We have a divergent offset. Emit a MUBUF buffer load instead. We can
+ // assume that the buffer is unswizzled.
+ SmallVector<SDValue, 4> Loads;
+ unsigned NumLoads = 1;
+ MVT LoadVT = VT.getSimpleVT();
+ unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
+ assert((LoadVT.getScalarType() == MVT::i32 ||
+ LoadVT.getScalarType() == MVT::f32) &&
+ isPowerOf2_32(NumElts));
+
+ if (NumElts == 8 || NumElts == 16) {
+ NumLoads = NumElts == 16 ? 4 : 2;
+ LoadVT = MVT::v4i32;
+ }
+
+ SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
+ unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
+ SDValue Ops[] = {
+ DAG.getEntryNode(), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ {}, // voffset
+ {}, // soffset
+ {}, // offset
+ DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ // Use the alignment to ensure that the required offsets will fit into the
+ // immediate offsets.
+ setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
+
+ uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+ Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
+ Ops, LoadVT, MMO));
+ }
+
+ if (VT == MVT::v8i32 || VT == MVT::v16i32)
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);
+
+ return Loads[0];
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -4755,14 +5123,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
- if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction()))
+ if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
return emitNonHSAIntrinsicError(DAG, DL, VT);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
}
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
- if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) {
+ if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
DiagnosticInfoUnsupported BadIntrin(
MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
@@ -4880,12 +5248,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::r600_read_tgid_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
- case Intrinsic::amdgcn_workitem_id_x: {
+ case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
- }
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
@@ -4896,19 +5263,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
- case AMDGPUIntrinsic::SI_load_const: {
- SDValue Ops[] = {
- Op.getOperand(1),
- Op.getOperand(2)
- };
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, VT, MMO);
+ case SIIntrinsic::SI_load_const: {
+ SDValue Load =
+ lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getTargetConstant(0, DL, MVT::i1), DAG);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Load);
+ }
+ case Intrinsic::amdgcn_s_buffer_load: {
+ unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
@@ -4991,34 +5355,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Denominator, Numerator);
}
case Intrinsic::amdgcn_icmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ // There is a Pat that handles this variant, so return it as-is.
+ if (Op.getOperand(1).getValueType() == MVT::i1 &&
+ Op.getConstantOperandVal(2) == 0 &&
+ Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+ return Op;
+ return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fcmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
- CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
@@ -5058,6 +5403,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
Opcode = AMDGPUISD::CVT_PK_U16_U32;
+ if (isTypeLegal(VT))
+ return DAG.getNode(Opcode, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
@@ -5127,36 +5475,104 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
Op.getOperand(3), // vindex
- Op.getOperand(4), // offset
- Op.getOperand(5), // glc
- Op.getOperand(6) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+ EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
+ auto *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ?
+ AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
- bool IsD16 = LoadVT.getScalarType() == MVT::f16;
- if (IsD16)
- return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG);
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+
+ unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ?
+ AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+ EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
+ auto *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
M->getMemOperand());
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
- bool IsD16 = LoadVT.getScalarType() == MVT::f16;
- if (IsD16) {
- return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, M, DAG);
- }
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
@@ -5164,12 +5580,62 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
- Op.getOperand(7), // dfmt
- Op.getOperand(8), // nfmt
- Op.getOperand(9), // glc
- Op.getOperand(10) // slc
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // format
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
};
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, LoadVT,
M->getMemOperand());
@@ -5184,14 +5650,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor: {
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // offset
- Op.getOperand(6) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -5235,16 +5709,193 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
}
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = 0;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+ break;
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+ break;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = 0;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+ break;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(5)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // src
Op.getOperand(3), // cmp
Op.getOperand(4), // rsrc
Op.getOperand(5), // vindex
- Op.getOperand(6), // offset
- Op.getOperand(7) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+ setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ EVT VT = Op.getValueType();
+ auto *M = cast<MemSDNode>(Op);
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+ Op->getVTList(), Ops, VT, M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Op.getOperand(4), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ EVT VT = Op.getValueType();
+ auto *M = cast<MemSDNode>(Op);
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+ Op->getVTList(), Ops, VT, M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
+ auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Op.getOperand(4), // rsrc
+ Op.getOperand(5), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(7), // soffset
+ Offsets.second, // offset
+ Op.getOperand(8), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -5360,19 +6011,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
Op.getOperand(2), Op.getOperand(3));
}
- case AMDGPUIntrinsic::AMDGPU_kill: {
- SDValue Src = Op.getOperand(2);
- if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
- if (!K->isNegative())
- return Chain;
-
- SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
- return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
- }
-
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
- return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
- }
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -5383,69 +6021,79 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
return SDValue();
};
- case AMDGPUIntrinsic::SI_tbuffer_store: {
-
- // Extract vindex and voffset from vaddr as appropriate
- const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
- const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
- SDValue VAddr = Op.getOperand(5);
-
- SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
-
- assert(!(OffEn->isOne() && IdxEn->isOne()) &&
- "Legacy intrinsic doesn't support both offset and index - use new version");
-
- SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
- SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
-
- // Deal with the vec-3 case
- const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
- auto Opcode = NumChannels->getZExtValue() == 3 ?
- AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
-
+ case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
- Chain,
- Op.getOperand(3), // vdata
- Op.getOperand(2), // rsrc
- VIndex,
- VOffset,
- Op.getOperand(6), // soffset
- Op.getOperand(7), // inst_offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(12), // glc
- Op.getOperand(13), // slc
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // voffset
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // offset
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
};
-
- assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 &&
- "Value of tfe other than zero is unsupported");
-
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(Opcode, DL,
- Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
- case Intrinsic::amdgcn_tbuffer_store: {
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
Chain,
VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // voffset
+ Offsets.first, // voffset
Op.getOperand(6), // soffset
- Op.getOperand(7), // offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(10), // glc
- Op.getOperand(11) // slc
+ Offsets.second, // offset
+ Op.getOperand(7), // format
+ Op.getOperand(8), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idexen
+ };
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -5460,15 +6108,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Chain,
- VData, // vdata
+ VData,
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
- Op.getOperand(5), // offset
- Op.getOperand(6), // glc
- Op.getOperand(7) // slc
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
};
+ setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
@@ -5476,6 +6132,59 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
+
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData,
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData,
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
+ };
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5486,6 +6195,94 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
}
+// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
+// offset (the offset that is included in bounds checking and swizzling, to be
+// split between the instruction's voffset and immoffset fields) and soffset
+// (the offset that is excluded from bounds checking and swizzling, to go in
+// the instruction's soffset field). This function takes the first kind of
+// offset and figures out how to split it between voffset and immoffset.
+std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
+ SDValue Offset, SelectionDAG &DAG) const {
+ SDLoc DL(Offset);
+ const unsigned MaxImm = 4095;
+ SDValue N0 = Offset;
+ ConstantSDNode *C1 = nullptr;
+
+ if ((C1 = dyn_cast<ConstantSDNode>(N0)))
+ N0 = SDValue();
+ else if (DAG.isBaseWithConstantOffset(N0)) {
+ C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ N0 = N0.getOperand(0);
+ }
+
+ if (C1) {
+ unsigned ImmOffset = C1->getZExtValue();
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+ C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+ if (Overflow) {
+ auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
+ if (!N0)
+ N0 = OverflowVal;
+ else {
+ SDValue Ops[] = { N0, OverflowVal };
+ N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
+ }
+ }
+ }
+ if (!N0)
+ N0 = DAG.getConstant(0, DL, MVT::i32);
+ if (!C1)
+ C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+ return {N0, SDValue(C1, 0)};
+}
+
+// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+// pointed to by Offsets.
+void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
+ SelectionDAG &DAG, SDValue *Offsets,
+ unsigned Align) const {
+ SDLoc DL(CombinedOffset);
+ if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
+ uint32_t Imm = C->getZExtValue();
+ uint32_t SOffset, ImmOffset;
+ if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
+ Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ return;
+ }
+ }
+ if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
+ SDValue N0 = CombinedOffset.getOperand(0);
+ SDValue N1 = CombinedOffset.getOperand(1);
+ uint32_t SOffset, ImmOffset;
+ int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
+ if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
+ Subtarget, Align)) {
+ Offsets[0] = N0;
+ Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ return;
+ }
+ }
+ Offsets[0] = CombinedOffset;
+ Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+}
+
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
ISD::LoadExtType ExtType, SDValue Op,
const SDLoc &SL, EVT VT) {
@@ -5513,8 +6310,8 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
// FIXME: Constant loads should all be marked invariant.
unsigned AS = Ld->getAddressSpace();
- if (AS != AMDGPUASI.CONSTANT_ADDRESS &&
- AS != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
+ if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
+ AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
(AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant()))
return SDValue();
@@ -5625,15 +6422,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
- if (!Op->isDivergent() && Alignment >= 4)
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -5641,28 +6438,28 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUASI.GLOBAL_ADDRESS) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
- Alignment >= 4)
+ Alignment >= 4 && NumElements < 32)
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v4 loads are supported for private and global memory.
return SDValue();
}
- if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
@@ -5681,7 +6478,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
// Use ds_read_b128 if possible.
if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
MemVT.getStoreSize() == 16)
@@ -5689,6 +6486,17 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
+
+ // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // address is negative, then the instruction is incorrectly treated as
+ // out-of-bounds even if base + offsets is in bounds. Split vectorized
+ // loads here to avoid emitting ds_read2_b32. We may re-combine the
+ // load later in the SILoadStoreOptimizer.
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ NumElements == 2 && MemVT.getStoreSize() == 8 &&
+ Load->getAlignment() < 8) {
+ return SplitVectorLoad(Op, DAG);
+ }
}
return SDValue();
}
@@ -6058,17 +6866,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
- if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
- AS == AMDGPUASI.FLAT_ADDRESS) {
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
return SDValue();
- } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+ } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
@@ -6083,7 +6891,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
// Use ds_write_b128 if possible.
if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
VT.getStoreSize() == 16)
@@ -6091,6 +6899,18 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
+
+ // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+ // address is negative, then the instruction is incorrectly treated as
+ // out-of-bounds even if base + offsets is in bounds. Split vectorized
+ // stores here to avoid emitting ds_write2_b32. We may re-combine the
+ // store later in the SILoadStoreOptimizer.
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ NumElements == 2 && VT.getStoreSize() == 8 &&
+ Store->getAlignment() < 8) {
+ return SplitVectorStore(Op, DAG);
+ }
+
return SDValue();
} else {
llvm_unreachable("unhandled address space");
@@ -6101,17 +6921,24 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
+ SDValue TrigVal;
+
// TODO: Should this propagate fast-math-flags?
- SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, Arg,
- DAG.getConstantFP(0.5/M_PI, DL,
- VT)));
+
+ SDValue OneOver2Pi = DAG.getConstantFP(0.5 / M_PI, DL, VT);
+
+ if (Subtarget->hasTrigReducedRange()) {
+ SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+ TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal);
+ } else {
+ TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+ }
switch (Op.getOpcode()) {
case ISD::FCOS:
- return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+ return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal);
case ISD::FSIN:
- return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+ return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal);
default:
llvm_unreachable("Wrong trig opcode");
}
@@ -6123,7 +6950,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
- if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
+ if (!isFlatGlobalAddrSpace(AS))
return Op;
// Non-local address space requires custom lowering for atomic compare
@@ -6475,6 +7302,29 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
}
}
+ if (RHS.getOpcode() == ISD::SETCC && LHS.getOpcode() == AMDGPUISD::FP_CLASS)
+ std::swap(LHS, RHS);
+
+ if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+ RHS.hasOneUse()) {
+ ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ // and (fcmp seto), (fp_class x, mask) -> fp_class x, mask & ~(p_nan | n_nan)
+ // and (fcmp setuo), (fp_class x, mask) -> fp_class x, mask & (p_nan | n_nan)
+ const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if ((LCC == ISD::SETO || LCC == ISD::SETUO) && Mask &&
+ (RHS.getOperand(0) == LHS.getOperand(0) &&
+ LHS.getOperand(0) == LHS.getOperand(1))) {
+ const unsigned OrdMask = SIInstrFlags::S_NAN | SIInstrFlags::Q_NAN;
+ unsigned NewMask = LCC == ISD::SETO ?
+ Mask->getZExtValue() & ~OrdMask :
+ Mask->getZExtValue() & OrdMask;
+
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, RHS.getOperand(0),
+ DAG.getConstant(NewMask, DL, MVT::i32));
+ }
+ }
+
if (VT == MVT::i32 &&
(RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
// and x, (sext cc from i1) => select cc, x, 0
@@ -6798,158 +7648,294 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
return AMDGPUTargetLowering::performRcpCombine(N, DCI);
}
-static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
- if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ unsigned MaxDepth) const {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::FCANONICALIZE)
return true;
- return DAG.isKnownNeverNaN(Op);
-}
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ auto F = CFP->getValueAPF();
+ if (F.isNaN() && F.isSignaling())
+ return false;
+ return !F.isDenormal() || denormalsEnabledForType(Op.getValueType());
+ }
-static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
- const GCNSubtarget *ST, unsigned MaxDepth=5) {
// If source is a result of another standard FP operation it is already in
// canonical form.
+ if (MaxDepth == 0)
+ return false;
- switch (Op.getOpcode()) {
- default:
- break;
-
+ switch (Opcode) {
// These will flush denorms if required.
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
- case ISD::FSQRT:
case ISD::FCEIL:
case ISD::FFLOOR:
case ISD::FMA:
case ISD::FMAD:
-
- case ISD::FCANONICALIZE:
- return true;
-
+ case ISD::FSQRT:
+ case ISD::FDIV:
+ case ISD::FREM:
case ISD::FP_ROUND:
- return Op.getValueType().getScalarType() != MVT::f16 ||
- ST->hasFP16Denormals();
-
case ISD::FP_EXTEND:
- return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
- ST->hasFP16Denormals();
+ case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::FMAD_FTZ:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::RSQ_CLAMP:
+ case AMDGPUISD::RCP_LEGACY:
+ case AMDGPUISD::RSQ_LEGACY:
+ case AMDGPUISD::RCP_IFLAG:
+ case AMDGPUISD::TRIG_PREOP:
+ case AMDGPUISD::DIV_SCALE:
+ case AMDGPUISD::DIV_FMAS:
+ case AMDGPUISD::DIV_FIXUP:
+ case AMDGPUISD::FRACT:
+ case AMDGPUISD::LDEXP:
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
+ return true;
// It can/will be lowered or combined as a bit operation.
// Need to check their input recursively to handle.
case ISD::FNEG:
case ISD::FABS:
- return (MaxDepth > 0) &&
- isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1);
+ case ISD::FCOPYSIGN:
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
case ISD::FSIN:
case ISD::FCOS:
case ISD::FSINCOS:
return Op.getValueType().getScalarType() != MVT::f16;
- // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
- // For such targets need to check their input recursively.
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FMINNAN:
- case ISD::FMAXNAN:
-
- if (ST->supportsMinMaxDenormModes() &&
- DAG.isKnownNeverNaN(Op.getOperand(0)) &&
- DAG.isKnownNeverNaN(Op.getOperand(1)))
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case AMDGPUISD::CLAMP:
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMIN3: {
+ // FIXME: Shouldn't treat the generic operations different based these.
+ // However, we aren't really required to flush the result from
+ // minnum/maxnum..
+
+ // snans will be quieted, so we only need to worry about denormals.
+ if (Subtarget->supportsMinMaxDenormModes() ||
+ denormalsEnabledForType(Op.getValueType()))
return true;
- return (MaxDepth > 0) &&
- isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) &&
- isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1);
+ // Flushing may be required.
+ // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
+ // targets need to check their input recursively.
+
+ // FIXME: Does this apply with clamp? It's implemented with max.
+ for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
+ if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
+ return false;
+ }
+
+ return true;
+ }
+ case ISD::SELECT: {
+ return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
+ }
+ case ISD::BUILD_VECTOR: {
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
+ return false;
+ }
- case ISD::ConstantFP: {
- auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
- return !F.isDenormal() && !(F.isNaN() && F.isSignaling());
+ return true;
}
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
}
- return false;
+ case ISD::INSERT_VECTOR_ELT: {
+ return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
+ }
+ case ISD::UNDEF:
+ // Could be anything.
+ return false;
+
+ case ISD::BITCAST: {
+ // Hack round the mess we make when legalizing extract_vector_elt
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i16 &&
+ Src.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncSrc = Src.getOperand(0);
+ if (TruncSrc.getValueType() == MVT::i32 &&
+ TruncSrc.getOpcode() == ISD::BITCAST &&
+ TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
+ return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
+ }
+ }
+
+ return false;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID
+ = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Handle more intrinsics
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_cvt_pkrtz:
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_fdot2:
+ return true;
+ default:
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ default:
+ return denormalsEnabledForType(Op.getValueType()) &&
+ DAG.isKnownNeverSNaN(Op);
+ }
+
+ llvm_unreachable("invalid operation");
}
// Constant fold canonicalize.
+SDValue SITargetLowering::getCanonicalConstantFP(
+ SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
+ // Flush denormals to 0 if not enabled.
+ if (C.isDenormal() && !denormalsEnabledForType(VT))
+ return DAG.getConstantFP(0.0, SL, VT);
+
+ if (C.isNaN()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+ if (C.isSignaling()) {
+ // Quiet a signaling NaN.
+ // FIXME: Is this supposed to preserve payload bits?
+ return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+ }
+
+ // Make sure it is the canonical NaN bitpattern.
+ //
+ // TODO: Can we use -1 as the canonical NaN value since it's an inline
+ // immediate?
+ if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
+ return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+ }
+
+ // Already canonical.
+ return DAG.getConstantFP(C, SL, VT);
+}
+
+static bool vectorEltWillFoldAway(SDValue Op) {
+ return Op.isUndef() || isa<ConstantFPSDNode>(Op);
+}
+
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
// fcanonicalize undef -> qnan
if (N0.isUndef()) {
- EVT VT = N->getValueType(0);
APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
return DAG.getConstantFP(QNaN, SDLoc(N), VT);
}
- ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
- if (!CFP) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N0.getValueType().getScalarType();
- auto ST = getSubtarget();
-
- if (((VT == MVT::f32 && ST->hasFP32Denormals()) ||
- (VT == MVT::f64 && ST->hasFP64Denormals()) ||
- (VT == MVT::f16 && ST->hasFP16Denormals())) &&
- DAG.isKnownNeverNaN(N0))
- return N0;
+ if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0)) {
+ EVT VT = N->getValueType(0);
+ return getCanonicalConstantFP(DAG, SDLoc(N), VT, CFP->getValueAPF());
+ }
- bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+ // fcanonicalize (build_vector x, k) -> build_vector (fcanonicalize x),
+ // (fcanonicalize k)
+ //
+ // fcanonicalize (build_vector x, undef) -> build_vector (fcanonicalize x), 0
- if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
- isCanonicalized(DAG, N0, ST))
- return N0;
+ // TODO: This could be better with wider vectors that will be split to v2f16,
+ // and to consider uses since there aren't that many packed operations.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+ isTypeLegal(MVT::v2f16)) {
+ SDLoc SL(N);
+ SDValue NewElts[2];
+ SDValue Lo = N0.getOperand(0);
+ SDValue Hi = N0.getOperand(1);
+ EVT EltVT = Lo.getValueType();
+
+ if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
+ for (unsigned I = 0; I != 2; ++I) {
+ SDValue Op = N0.getOperand(I);
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
+ CFP->getValueAPF());
+ } else if (Op.isUndef()) {
+ // Handled below based on what the other operand is.
+ NewElts[I] = Op;
+ } else {
+ NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
+ }
+ }
- return SDValue();
- }
+ // If one half is undef, and one is constant, perfer a splat vector rather
+ // than the normal qNaN. If it's a register, prefer 0.0 since that's
+ // cheaper to use and may be free with a packed operation.
+ if (NewElts[0].isUndef()) {
+ if (isa<ConstantFPSDNode>(NewElts[1]))
+ NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+ NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+ }
- const APFloat &C = CFP->getValueAPF();
+ if (NewElts[1].isUndef()) {
+ NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+ NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+ }
- // Flush denormals to 0 if not enabled.
- if (C.isDenormal()) {
- EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
- if (SVT == MVT::f32 && !Subtarget->hasFP32Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
+ return DAG.getBuildVector(VT, SL, NewElts);
+ }
+ }
- if (SVT == MVT::f64 && !Subtarget->hasFP64Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
+ unsigned SrcOpc = N0.getOpcode();
- if (SVT == MVT::f16 && !Subtarget->hasFP16Denormals())
- return DAG.getConstantFP(0.0, SDLoc(N), VT);
- }
+ // If it's free to do so, push canonicalizes further up the source, which may
+ // find a canonical source.
+ //
+ // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
+ // sNaNs.
+ if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
+ auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CRHS && N0.hasOneUse()) {
+ SDLoc SL(N);
+ SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
+ N0.getOperand(0));
+ SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
+ DCI.AddToWorklist(Canon0.getNode());
- if (C.isNaN()) {
- EVT VT = N->getValueType(0);
- APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
- if (C.isSignaling()) {
- // Quiet a signaling NaN.
- return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
+ return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
}
-
- // Make sure it is the canonical NaN bitpattern.
- //
- // TODO: Can we use -1 as the canonical NaN value since it's an inline
- // immediate?
- if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
- return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return N0;
+ return isCanonicalized(DAG, N0) ? N0 : SDValue();
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
+ case ISD::FMAXNUM_IEEE:
return AMDGPUISD::FMAX3;
case ISD::SMAX:
return AMDGPUISD::SMAX3;
case ISD::UMAX:
return AMDGPUISD::UMAX3;
case ISD::FMINNUM:
+ case ISD::FMINNUM_IEEE:
return AMDGPUISD::FMIN3;
case ISD::SMIN:
return AMDGPUISD::SMIN3;
@@ -7044,11 +8030,18 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
// then give the other result, which is different from med3 with a NaN
// input.
SDValue Var = Op0.getOperand(0);
- if (!isKnownNeverSNan(DAG, Var))
+ if (!DAG.isKnownNeverSNaN(Var))
return SDValue();
- return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
- Var, SDValue(K0, 0), SDValue(K1, 0));
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ if ((!K0->hasOneUse() ||
+ TII->isInlineConstant(K0->getValueAPF().bitcastToAPInt())) &&
+ (!K1->hasOneUse() ||
+ TII->isInlineConstant(K1->getValueAPF().bitcastToAPInt()))) {
+ return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
+ Var, SDValue(K0, 0), SDValue(K1, 0));
+ }
}
return SDValue();
@@ -7109,6 +8102,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
+ (Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
@@ -7216,9 +8210,11 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
switch(Opc) {
default:
- return SDValue();
+ break;
// TODO: Support other binary operations.
case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
case ISD::ADD:
case ISD::UMIN:
case ISD::UMAX:
@@ -7226,25 +8222,54 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
case ISD::SMAX:
case ISD::FMAXNUM:
case ISD::FMINNUM:
- return DAG.getNode(Opc, SL, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(0), Idx),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Vec.getOperand(1), Idx));
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE: {
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(0), Idx);
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec.getOperand(1), Idx);
+
+ DCI.AddToWorklist(Elt0.getNode());
+ DCI.AddToWorklist(Elt1.getNode());
+ return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
+ }
}
}
- if (!DCI.isBeforeLegalize())
- return SDValue();
-
unsigned VecSize = VecVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
+ // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
+ // This elminates non-constant index and subsequent movrel or scratch access.
+ // Sub-dword vectors of size 2 dword or less have better implementation.
+ // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+ // instructions.
+ if (VecSize <= 256 && (VecSize > 64 || EltSize >= 32) &&
+ !isa<ConstantSDNode>(N->getOperand(1))) {
+ SDLoc SL(N);
+ SDValue Idx = N->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDValue V;
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+ SDValue IC = DAG.getConstant(I, SL, IdxVT);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+ if (I == 0)
+ V = Elt;
+ else
+ V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
+ }
+ return V;
+ }
+
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
// Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
// elements. This exposes more load reduction opportunities by replacing
// multiple small extract_vector_elements with a single 32-bit extract.
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (EltSize <= 16 &&
+ if (isa<MemSDNode>(Vec) &&
+ EltSize <= 16 &&
EltVT.isByteSized() &&
VecSize > 32 &&
VecSize % 32 == 0 &&
@@ -7274,46 +8299,40 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
return SDValue();
}
-static bool convertBuildVectorCastElt(SelectionDAG &DAG,
- SDValue &Lo, SDValue &Hi) {
- if (Hi.getOpcode() == ISD::BITCAST &&
- Hi.getOperand(0).getValueType() == MVT::f16 &&
- (isa<ConstantSDNode>(Lo) || Lo.isUndef())) {
- Lo = DAG.getNode(ISD::BITCAST, SDLoc(Lo), MVT::f16, Lo);
- Hi = Hi.getOperand(0);
- return true;
- }
-
- return false;
-}
-
-SDValue SITargetLowering::performBuildVectorCombine(
- SDNode *N, DAGCombinerInfo &DCI) const {
- SDLoc SL(N);
+SDValue
+SITargetLowering::performInsertVectorEltCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned VecSize = VecVT.getSizeInBits();
+ unsigned EltSize = EltVT.getSizeInBits();
- if (!isTypeLegal(MVT::v2i16))
+ // INSERT_VECTOR_ELT (<n x e>, var-idx)
+ // => BUILD_VECTOR n x select (e, const-idx)
+ // This elminates non-constant index and subsequent movrel or scratch access.
+ // Sub-dword vectors of size 2 dword or less have better implementation.
+ // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+ // instructions.
+ if (isa<ConstantSDNode>(Idx) ||
+ VecSize > 256 || (VecSize <= 64 && EltSize < 32))
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::v2i16) {
- SDValue Lo = N->getOperand(0);
- SDValue Hi = N->getOperand(1);
- // v2i16 build_vector (const|undef), (bitcast f16:$x)
- // -> bitcast (v2f16 build_vector const|undef, $x
- if (convertBuildVectorCastElt(DAG, Lo, Hi)) {
- SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Lo, Hi });
- return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
- }
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ SDValue Ins = N->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
- if (convertBuildVectorCastElt(DAG, Hi, Lo)) {
- SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Hi, Lo });
- return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
- }
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+ SDValue IC = DAG.getConstant(I, SL, IdxVT);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+ SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
+ Ops.push_back(V);
}
- return SDValue();
+ return DAG.getBuildVector(VecVT, SL, Ops);
}
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
@@ -7568,7 +8587,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc SL(N);
- if (!Subtarget->hasDLInsts() || VT != MVT::f32)
+ if (!Subtarget->hasDotInsts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
@@ -7705,16 +8724,26 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
VT != MVT::f16))
return SDValue();
- // Match isinf pattern
+ // Match isinf/isfinite pattern
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
- if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
+ // (fcmp one (fabs x), inf) -> (fp_class x,
+ // (p_normal | n_normal | p_subnormal | n_subnormal | p_zero | n_zero)
+ if ((CC == ISD::SETOEQ || CC == ISD::SETONE) && LHS.getOpcode() == ISD::FABS) {
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
if (!CRHS)
return SDValue();
const APFloat &APF = CRHS->getValueAPF();
if (APF.isInfinity() && !APF.isNegative()) {
- unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
+ const unsigned IsInfMask = SIInstrFlags::P_INFINITY |
+ SIInstrFlags::N_INFINITY;
+ const unsigned IsFiniteMask = SIInstrFlags::N_ZERO |
+ SIInstrFlags::P_ZERO |
+ SIInstrFlags::N_NORMAL |
+ SIInstrFlags::P_NORMAL |
+ SIInstrFlags::N_SUBNORMAL |
+ SIInstrFlags::P_SUBNORMAL;
+ unsigned Mask = CC == ISD::SETOEQ ? IsInfMask : IsFiniteMask;
return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(Mask, SL, MVT::i32));
}
@@ -7759,8 +8788,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
- TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
+ if (TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
}
@@ -7792,6 +8820,9 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return SDValue();
+
switch (N->getOpcode()) {
default:
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -7810,17 +8841,15 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
case ISD::FMINNUM:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMINNUM_IEEE:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN:
case AMDGPUISD::FMIN_LEGACY:
- case AMDGPUISD::FMAX_LEGACY: {
- if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return performMinMaxCombine(N, DCI);
- break;
- }
+ case AMDGPUISD::FMAX_LEGACY:
+ return performMinMaxCombine(N, DCI);
case ISD::FMA:
return performFMACombine(N, DCI);
case ISD::LOAD: {
@@ -7912,8 +8941,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DCI);
- case ISD::BUILD_VECTOR:
- return performBuildVectorCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT:
+ return performInsertVectorEltCombine(N, DCI);
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -7926,6 +8955,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
case AMDGPU::sub1: return 1;
case AMDGPU::sub2: return 2;
case AMDGPU::sub3: return 3;
+ case AMDGPU::sub4: return 4; // Possible with TFE/LWE
}
}
@@ -7939,11 +8969,16 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
return Node; // not implemented for D16
- SDNode *Users[4] = { nullptr };
+ SDNode *Users[5] = { nullptr };
unsigned Lane = 0;
unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
unsigned NewDmask = 0;
+ unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
+ unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
+ bool UsesTFC = (Node->getConstantOperandVal(TFEIdx) ||
+ Node->getConstantOperandVal(LWEIdx)) ? 1 : 0;
+ unsigned TFCLane = 0;
bool HasChain = Node->getNumValues() > 1;
if (OldDmask == 0) {
@@ -7951,6 +8986,12 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
return Node;
}
+ unsigned OldBitsSet = countPopulation(OldDmask);
+ // Work out which is the TFE/LWE lane if that is enabled.
+ if (UsesTFC) {
+ TFCLane = OldBitsSet;
+ }
+
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
I != E; ++I) {
@@ -7970,28 +9011,49 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
- // Set which texture component corresponds to the lane.
- unsigned Comp;
- for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
- Comp = countTrailingZeros(Dmask);
- Dmask &= ~(1 << Comp);
- }
+ // Check if the use is for the TFE/LWE generated result at VGPRn+1.
+ if (UsesTFC && Lane == TFCLane) {
+ Users[Lane] = *I;
+ } else {
+ // Set which texture component corresponds to the lane.
+ unsigned Comp;
+ for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
+ Comp = countTrailingZeros(Dmask);
+ Dmask &= ~(1 << Comp);
+ }
- // Abort if we have more than one user per component
- if (Users[Lane])
- return Node;
+ // Abort if we have more than one user per component.
+ if (Users[Lane])
+ return Node;
- Users[Lane] = *I;
- NewDmask |= 1 << Comp;
+ Users[Lane] = *I;
+ NewDmask |= 1 << Comp;
+ }
}
+ // Don't allow 0 dmask, as hardware assumes one channel enabled.
+ bool NoChannels = !NewDmask;
+ if (NoChannels) {
+ // If the original dmask has one channel - then nothing to do
+ if (OldBitsSet == 1)
+ return Node;
+ // Use an arbitrary dmask - required for the instruction to work
+ NewDmask = 1;
+ }
// Abort if there's no change
if (NewDmask == OldDmask)
return Node;
unsigned BitsSet = countPopulation(NewDmask);
- int NewOpcode = AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+ // Check for TFE or LWE - increase the number of channels by one to account
+ // for the extra return value
+ // This will need adjustment for D16 if this is also included in
+ // adjustWriteMask (this function) but at present D16 are excluded.
+ unsigned NewChannels = BitsSet + UsesTFC;
+
+ int NewOpcode =
+ AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), NewChannels);
assert(NewOpcode != -1 &&
NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
"failed to find equivalent MIMG op");
@@ -8004,8 +9066,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
- MVT ResultVT = BitsSet == 1 ?
- SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet);
+ MVT ResultVT = NewChannels == 1 ?
+ SVT : MVT::getVectorVT(SVT, NewChannels == 3 ? 4 :
+ NewChannels == 5 ? 8 : NewChannels);
SDVTList NewVTList = HasChain ?
DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT);
@@ -8015,11 +9078,11 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
if (HasChain) {
// Update chain.
- NewNode->setMemRefs(Node->memoperands_begin(), Node->memoperands_end());
+ DAG.setNodeMemRefs(NewNode, Node->memoperands());
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
}
- if (BitsSet == 1) {
+ if (NewChannels == 1) {
assert(Node->hasNUsesOfValue(1, 0));
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
SDLoc(Node), Users[Lane]->getValueType(0),
@@ -8029,19 +9092,24 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
// Update the users of the node with the new indices
- for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+ for (unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
SDNode *User = Users[i];
- if (!User)
- continue;
-
- SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
- DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ if (!User) {
+ // Handle the special case of NoChannels. We set NewDmask to 1 above, but
+ // Users[0] is still nullptr because channel 0 doesn't really have a use.
+ if (i || !NoChannels)
+ continue;
+ } else {
+ SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
+ DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+ }
switch (Idx) {
default: break;
case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+ case AMDGPU::sub3: Idx = AMDGPU::sub4; break;
}
}
@@ -8457,49 +9525,56 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
}
+LLVM_ATTRIBUTE_UNUSED
+static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
+ assert(N->getOpcode() == ISD::CopyFromReg);
+ do {
+ // Follow the chain until we find an INLINEASM node.
+ N = N->getOperand(0).getNode();
+ if (N->getOpcode() == ISD::INLINEASM)
+ return true;
+ } while (N->getOpcode() == ISD::CopyFromReg);
+ return false;
+}
+
bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
- FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
+ FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const
{
switch (N->getOpcode()) {
- case ISD::Register:
case ISD::CopyFromReg:
{
- const RegisterSDNode *R = nullptr;
- if (N->getOpcode() == ISD::Register) {
- R = dyn_cast<RegisterSDNode>(N);
- }
- else {
- R = dyn_cast<RegisterSDNode>(N->getOperand(1));
- }
- if (R)
- {
- const MachineFunction * MF = FLI->MF;
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
- unsigned Reg = R->getReg();
- if (TRI.isPhysicalRegister(Reg))
- return TRI.isVGPR(MRI, Reg);
-
- if (MRI.isLiveIn(Reg)) {
- // workitem.id.x workitem.id.y workitem.id.z
- // Any VGPR formal argument is also considered divergent
- if (TRI.isVGPR(MRI, Reg))
- return true;
- // Formal arguments of non-entry functions
- // are conservatively considered divergent
- else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
- return true;
- }
- return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+ const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+ const MachineFunction * MF = FLI->MF;
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+ unsigned Reg = R->getReg();
+ if (TRI.isPhysicalRegister(Reg))
+ return !TRI.isSGPRReg(MRI, Reg);
+
+ if (MRI.isLiveIn(Reg)) {
+ // workitem.id.x workitem.id.y workitem.id.z
+ // Any VGPR formal argument is also considered divergent
+ if (!TRI.isSGPRReg(MRI, Reg))
+ return true;
+ // Formal arguments of non-entry functions
+ // are conservatively considered divergent
+ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+ return true;
+ return false;
}
+ const Value *V = FLI->getValueFromVirtualReg(Reg);
+ if (V)
+ return KDA->isDivergent(V);
+ assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
+ return !TRI.isSGPRReg(MRI, Reg);
}
break;
case ISD::LOAD: {
- const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
- if (L->getMemOperand()->getAddrSpace() ==
- Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
- return true;
+ const LoadSDNode *L = cast<LoadSDNode>(N);
+ unsigned AS = L->getAddressSpace();
+ // A flat load may access private memory.
+ return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
} break;
case ISD::CALLSEQ_END:
return true;
@@ -8522,3 +9597,30 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
}
return false;
}
+
+bool SITargetLowering::denormalsEnabledForType(EVT VT) const {
+ switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return Subtarget->hasFP32Denormals();
+ case MVT::f64:
+ return Subtarget->hasFP64Denormals();
+ case MVT::f16:
+ return Subtarget->hasFP16Denormals();
+ default:
+ return false;
+ }
+}
+
+bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN,
+ unsigned Depth) const {
+ if (Op.getOpcode() == AMDGPUISD::CLAMP) {
+ if (Subtarget->enableDX10Clamp())
+ return true; // Clamped to 0.
+ return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+ }
+
+ return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
+ SNaN, Depth);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 5b3d49b3d8e3..bcef519ee663 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -60,11 +60,22 @@ private:
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const;
+ SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
+ SDValue GLC, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
+ // (the offset that is included in bounds checking and swizzling, to be split
+ // between the instruction's voffset and immoffset fields) and soffset (the
+ // offset that is excluded from bounds checking and swizzling, to go in the
+ // instruction's soffset field). This function takes the first kind of
+ // offset and figures out how to split it between voffset and immoffset.
+ std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
+ SelectionDAG &DAG) const;
+
SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -81,7 +92,7 @@ private:
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
- SelectionDAG &DAG,
+ SelectionDAG &DAG, ArrayRef<SDValue> Ops,
bool IsIntrinsic = false) const;
SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
@@ -99,6 +110,7 @@ private:
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -130,6 +142,8 @@ private:
SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
+ const APFloat &C) const;
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
@@ -140,7 +154,7 @@ private:
SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
@@ -156,7 +170,6 @@ private:
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
- bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -175,6 +188,12 @@ private:
/// global value \p GV, false otherwise.
bool shouldEmitPCReloc(const GlobalValue *GV) const;
+ // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+ // three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+ // pointed to by Offsets.
+ void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
+ SDValue *Offsets, unsigned Align = 4) const;
+
public:
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
@@ -192,6 +211,7 @@ public:
SmallVectorImpl<Value*> &/*Ops*/,
Type *&/*AccessTy*/) const override;
+ bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
@@ -215,7 +235,7 @@ public:
bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
@@ -248,11 +268,11 @@ public:
void passSpecialInputs(
CallLoweringInfo &CLI,
+ CCState &CCInfo,
const SIMachineFunctionInfo &Info,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains,
- SDValue Chain,
- SDValue StackPtr) const;
+ SDValue Chain) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -322,7 +342,16 @@ public:
unsigned Depth = 0) const override;
bool isSDNodeSourceOfDivergence(const SDNode *N,
- FunctionLoweringInfo *FLI, DivergenceAnalysis *DA) const override;
+ FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
+
+ bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ unsigned MaxDepth = 5) const;
+ bool denormalsEnabledForType(EVT VT) const;
+
+ bool isKnownNeverNaNForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
+ bool SNaN = false,
+ unsigned Depth = 0) const override;
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index dc9397cf7b85..ba21a5ce1293 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -66,6 +66,8 @@ private:
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
+ bool optimizeVccBranch(MachineInstr &MI) const;
+
public:
static char ID;
@@ -320,6 +322,96 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
return true;
}
+bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
+ // Match:
+ // sreg = -1
+ // vcc = S_AND_B64 exec, sreg
+ // S_CBRANCH_VCC[N]Z
+ // =>
+ // S_CBRANCH_EXEC[N]Z
+ bool Changed = false;
+ MachineBasicBlock &MBB = *MI.getParent();
+ const unsigned CondReg = AMDGPU::VCC;
+ const unsigned ExecReg = AMDGPU::EXEC;
+ const unsigned And = AMDGPU::S_AND_B64;
+
+ MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
+ E = MBB.rend();
+ bool ReadsCond = false;
+ unsigned Threshold = 5;
+ for (++A ; A != E ; ++A) {
+ if (!--Threshold)
+ return false;
+ if (A->modifiesRegister(ExecReg, TRI))
+ return false;
+ if (A->modifiesRegister(CondReg, TRI)) {
+ if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
+ return false;
+ break;
+ }
+ ReadsCond |= A->readsRegister(CondReg, TRI);
+ }
+ if (A == E)
+ return false;
+
+ MachineOperand &Op1 = A->getOperand(1);
+ MachineOperand &Op2 = A->getOperand(2);
+ if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
+ TII->commuteInstruction(*A);
+ Changed = true;
+ }
+ if (Op1.getReg() != ExecReg)
+ return Changed;
+ if (Op2.isImm() && Op2.getImm() != -1)
+ return Changed;
+
+ unsigned SReg = AMDGPU::NoRegister;
+ if (Op2.isReg()) {
+ SReg = Op2.getReg();
+ auto M = std::next(A);
+ bool ReadsSreg = false;
+ for ( ; M != E ; ++M) {
+ if (M->definesRegister(SReg, TRI))
+ break;
+ if (M->modifiesRegister(SReg, TRI))
+ return Changed;
+ ReadsSreg |= M->readsRegister(SReg, TRI);
+ }
+ if (M == E ||
+ !M->isMoveImmediate() ||
+ !M->getOperand(1).isImm() ||
+ M->getOperand(1).getImm() != -1)
+ return Changed;
+ // First if sreg is only used in and instruction fold the immediate
+ // into that and.
+ if (!ReadsSreg && Op2.isKill()) {
+ A->getOperand(2).ChangeToImmediate(-1);
+ M->eraseFromParent();
+ }
+ }
+
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
+ MI.killsRegister(CondReg, TRI))
+ A->eraseFromParent();
+
+ bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
+ if (SReg == ExecReg) {
+ if (IsVCCZ) {
+ MI.eraseFromParent();
+ return true;
+ }
+ MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+ } else {
+ MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
+ : AMDGPU::S_CBRANCH_EXECNZ));
+ }
+
+ MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+ MI.addImplicitDefUseOperands(*MBB.getParent());
+
+ return true;
+}
+
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -384,7 +476,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
kill(MI);
if (ExecBranchStack.empty()) {
- if (skipIfDead(MI, *NextBB)) {
+ if (NextBB != BE && skipIfDead(MI, *NextBB)) {
HaveSkipBlock = true;
NextBB = std::next(BI);
BE = MF.end();
@@ -417,6 +509,11 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
+ case AMDGPU::S_CBRANCH_VCCZ:
+ case AMDGPU::S_CBRANCH_VCCNZ:
+ MadeChange |= optimizeVccBranch(MI);
+ break;
+
default:
break;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index d456e3d9b94d..afc0b4467610 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -13,6 +13,14 @@
/// Memory reads and writes are issued asynchronously, so we need to insert
/// S_WAITCNT instructions when we want to access any of their results or
/// overwrite any register that's used asynchronously.
+///
+/// TODO: This pass currently keeps one timeline per hardware counter. A more
+/// finely-grained approach that keeps one timeline per event type could
+/// sometimes get away with generating weaker s_waitcnt instructions. For
+/// example, when both SMEM and LDS are in flight and we need to wait for
+/// the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient,
+/// but the pass will currently generate a conservative lgkmcnt(0) because
+/// multiple event types are in flight.
//
//===----------------------------------------------------------------------===//
@@ -33,7 +41,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -69,6 +76,25 @@ static cl::opt<unsigned> ForceEmitZeroFlag(
namespace {
+template <typename EnumT>
+class enum_iterator
+ : public iterator_facade_base<enum_iterator<EnumT>,
+ std::forward_iterator_tag, const EnumT> {
+ EnumT Value;
+public:
+ enum_iterator() = default;
+ enum_iterator(EnumT Value) : Value(Value) {}
+
+ enum_iterator &operator++() {
+ Value = static_cast<EnumT>(Value + 1);
+ return *this;
+ }
+
+ bool operator==(const enum_iterator &RHS) const { return Value == RHS.Value; }
+
+ EnumT operator*() const { return Value; }
+};
+
// Class of object that encapsulates latest instruction counter score
// associated with the operand. Used for determining whether
// s_waitcnt instruction needs to be emited.
@@ -77,12 +103,17 @@ namespace {
enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
+ return make_range(enum_iterator<InstCounterType>(VM_CNT),
+ enum_iterator<InstCounterType>(NUM_INST_CNTS));
+}
+
using RegInterval = std::pair<signed, signed>;
struct {
- int32_t VmcntMax;
- int32_t ExpcntMax;
- int32_t LgkmcntMax;
+ uint32_t VmcntMax;
+ uint32_t ExpcntMax;
+ uint32_t LgkmcntMax;
int32_t NumVGPRsMax;
int32_t NumSGPRsMax;
} HardwareLimits;
@@ -108,6 +139,14 @@ enum WaitEventType {
NUM_WAIT_EVENTS,
};
+static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
+ (1 << VMEM_ACCESS),
+ (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
+ (1 << SQ_MESSAGE),
+ (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
+ (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
+};
+
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
@@ -122,30 +161,38 @@ enum RegisterMapping {
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
};
-#define ForAllWaitEventType(w) \
- for (enum WaitEventType w = (enum WaitEventType)0; \
- (w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
- (w) = (enum WaitEventType)((w) + 1))
+void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
+ switch (T) {
+ case VM_CNT:
+ Wait.VmCnt = std::min(Wait.VmCnt, Count);
+ break;
+ case EXP_CNT:
+ Wait.ExpCnt = std::min(Wait.ExpCnt, Count);
+ break;
+ case LGKM_CNT:
+ Wait.LgkmCnt = std::min(Wait.LgkmCnt, Count);
+ break;
+ default:
+ llvm_unreachable("bad InstCounterType");
+ }
+}
-// This is a per-basic-block object that maintains current score brackets
-// of each wait counter, and a per-register scoreboard for each wait counter.
+// This objects maintains the current score brackets of each wait counter, and
+// a per-register scoreboard for each wait counter.
+//
// We also maintain the latest score for every event type that can change the
// waitcnt in order to know if there are multiple types of events within
// the brackets. When multiple types of event happen in the bracket,
// wait count may get decreased out of order, therefore we need to put in
// "s_waitcnt 0" before use.
-class BlockWaitcntBrackets {
+class WaitcntBrackets {
public:
- BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
+ WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
+ for (auto T : inst_counter_types())
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- }
}
- ~BlockWaitcntBrackets() = default;
-
- static int32_t getWaitCountMax(InstCounterType T) {
+ static uint32_t getWaitCountMax(InstCounterType T) {
switch (T) {
case VM_CNT:
return HardwareLimits.VmcntMax;
@@ -159,33 +206,14 @@ public:
return 0;
}
- void setScoreLB(InstCounterType T, int32_t Val) {
- assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
- ScoreLBs[T] = Val;
- }
-
- void setScoreUB(InstCounterType T, int32_t Val) {
- assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
- ScoreUBs[T] = Val;
- if (T == EXP_CNT) {
- int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
- if (ScoreLBs[T] < UB)
- ScoreLBs[T] = UB;
- }
- }
-
- int32_t getScoreLB(InstCounterType T) {
+ uint32_t getScoreLB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
return ScoreLBs[T];
}
- int32_t getScoreUB(InstCounterType T) {
+ uint32_t getScoreUB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
@@ -194,89 +222,56 @@ public:
// Mapping from event to counter.
InstCounterType eventCounter(WaitEventType E) {
- switch (E) {
- case VMEM_ACCESS:
+ if (E == VMEM_ACCESS)
return VM_CNT;
- case LDS_ACCESS:
- case GDS_ACCESS:
- case SQ_MESSAGE:
- case SMEM_ACCESS:
+ if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
return LGKM_CNT;
- case EXP_GPR_LOCK:
- case GDS_GPR_LOCK:
- case VMW_GPR_LOCK:
- case EXP_POS_ACCESS:
- case EXP_PARAM_ACCESS:
- return EXP_CNT;
- default:
- llvm_unreachable("unhandled event type");
- }
- return NUM_INST_CNTS;
+ assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
+ return EXP_CNT;
}
- void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
- if (GprNo < NUM_ALL_VGPRS) {
- if (GprNo > VgprUB) {
- VgprUB = GprNo;
- }
- VgprScores[T][GprNo] = Val;
- } else {
- assert(T == LGKM_CNT);
- if (GprNo - NUM_ALL_VGPRS > SgprUB) {
- SgprUB = GprNo - NUM_ALL_VGPRS;
- }
- SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
- }
- }
-
- int32_t getRegScore(int GprNo, InstCounterType T) {
+ uint32_t getRegScore(int GprNo, InstCounterType T) {
if (GprNo < NUM_ALL_VGPRS) {
return VgprScores[T][GprNo];
}
+ assert(T == LGKM_CNT);
return SgprScores[GprNo - NUM_ALL_VGPRS];
}
void clear() {
memset(ScoreLBs, 0, sizeof(ScoreLBs));
memset(ScoreUBs, 0, sizeof(ScoreUBs));
- memset(EventUBs, 0, sizeof(EventUBs));
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
+ PendingEvents = 0;
+ memset(MixedPendingEvents, 0, sizeof(MixedPendingEvents));
+ for (auto T : inst_counter_types())
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- }
memset(SgprScores, 0, sizeof(SgprScores));
}
+ bool merge(const WaitcntBrackets &Other);
+
RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI, unsigned OpNo,
bool Def) const;
- void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
- const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val);
-
- void setWaitAtBeginning() { WaitAtBeginning = true; }
- void clearWaitAtBeginning() { WaitAtBeginning = false; }
- bool getWaitAtBeginning() const { return WaitAtBeginning; }
- void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
int32_t getMaxVGPR() const { return VgprUB; }
int32_t getMaxSGPR() const { return SgprUB; }
- int32_t getEventUB(enum WaitEventType W) const {
- assert(W < NUM_WAIT_EVENTS);
- return EventUBs[W];
- }
-
- bool counterOutOfOrder(InstCounterType T);
- unsigned int updateByWait(InstCounterType T, int ScoreToWait);
+ bool counterOutOfOrder(InstCounterType T) const;
+ bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
+ bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
+ void determineWait(InstCounterType T, uint32_t ScoreToWait,
+ AMDGPU::Waitcnt &Wait) const;
+ void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
+ void applyWaitcnt(InstCounterType T, unsigned Count);
void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI, WaitEventType E,
MachineInstr &MI);
- bool hasPendingSMEM() const {
- return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
+ bool hasPending() const { return PendingEvents != 0; }
+ bool hasPendingEvent(WaitEventType E) const {
+ return PendingEvents & (1 << E);
}
bool hasPendingFlat() const {
@@ -291,75 +286,71 @@ public:
LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
}
- int pendingFlat(InstCounterType Ct) const { return LastFlat[Ct]; }
-
- void setLastFlat(InstCounterType Ct, int Val) { LastFlat[Ct] = Val; }
-
- bool getRevisitLoop() const { return RevisitLoop; }
- void setRevisitLoop(bool RevisitLoopIn) { RevisitLoop = RevisitLoopIn; }
+ void print(raw_ostream &);
+ void dump() { print(dbgs()); }
- void setPostOrder(int32_t PostOrderIn) { PostOrder = PostOrderIn; }
- int32_t getPostOrder() const { return PostOrder; }
+private:
+ struct MergeInfo {
+ uint32_t OldLB;
+ uint32_t OtherLB;
+ uint32_t MyShift;
+ uint32_t OtherShift;
+ };
+ static bool mergeScore(const MergeInfo &M, uint32_t &Score,
+ uint32_t OtherScore);
+
+ void setScoreLB(InstCounterType T, uint32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreLBs[T] = Val;
+ }
- void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
- void clearWaitcnt() { Waitcnt = nullptr; }
- MachineInstr *getWaitcnt() const { return Waitcnt; }
+ void setScoreUB(InstCounterType T, uint32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreUBs[T] = Val;
+ if (T == EXP_CNT) {
+ uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+ if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
+ ScoreLBs[T] = UB;
+ }
+ }
- bool mixedExpTypes() const { return MixedExpTypes; }
- void setMixedExpTypes(bool MixedExpTypesIn) {
- MixedExpTypes = MixedExpTypesIn;
+ void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ if (GprNo > VgprUB) {
+ VgprUB = GprNo;
+ }
+ VgprScores[T][GprNo] = Val;
+ } else {
+ assert(T == LGKM_CNT);
+ if (GprNo - NUM_ALL_VGPRS > SgprUB) {
+ SgprUB = GprNo - NUM_ALL_VGPRS;
+ }
+ SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
+ }
}
- void print(raw_ostream &);
- void dump() { print(dbgs()); }
+ void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
+ unsigned OpNo, uint32_t Val);
-private:
const GCNSubtarget *ST = nullptr;
- bool WaitAtBeginning = false;
- bool RevisitLoop = false;
- bool MixedExpTypes = false;
- int32_t PostOrder = 0;
- MachineInstr *Waitcnt = nullptr;
- int32_t ScoreLBs[NUM_INST_CNTS] = {0};
- int32_t ScoreUBs[NUM_INST_CNTS] = {0};
- int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
+ uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
+ uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
+ uint32_t PendingEvents = 0;
+ bool MixedPendingEvents[NUM_INST_CNTS] = {false};
// Remember the last flat memory operation.
- int32_t LastFlat[NUM_INST_CNTS] = {0};
+ uint32_t LastFlat[NUM_INST_CNTS] = {0};
// wait_cnt scores for every vgpr.
// Keep track of the VgprUB and SgprUB to make merge at join efficient.
int32_t VgprUB = 0;
int32_t SgprUB = 0;
- int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
// Wait cnt scores for every sgpr, only lgkmcnt is relevant.
- int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
-};
-
-// This is a per-loop-region object that records waitcnt status at the end of
-// loop footer from the previous iteration. We also maintain an iteration
-// count to track the number of times the loop has been visited. When it
-// doesn't converge naturally, we force convergence by inserting s_waitcnt 0
-// at the end of the loop footer.
-class LoopWaitcntData {
-public:
- LoopWaitcntData() = default;
- ~LoopWaitcntData() = default;
-
- void incIterCnt() { IterCnt++; }
- void resetIterCnt() { IterCnt = 0; }
- unsigned getIterCnt() { return IterCnt; }
-
- void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
- MachineInstr *getWaitcnt() const { return LfWaitcnt; }
-
- void print() { LLVM_DEBUG(dbgs() << " iteration " << IterCnt << '\n';); }
-
-private:
- // s_waitcnt added at the end of loop footer to stablize wait scores
- // at the end of the loop footer.
- MachineInstr *LfWaitcnt = nullptr;
- // Number of iterations the loop has been visited, not including the initial
- // walk over.
- int32_t IterCnt = 0;
+ uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
};
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -368,22 +359,21 @@ private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
- const MachineLoopInfo *MLI = nullptr;
- AMDGPU::IsaInfo::IsaVersion IV;
- AMDGPUAS AMDGPUASI;
+ AMDGPU::IsaVersion IV;
- DenseSet<MachineBasicBlock *> BlockVisitedSet;
DenseSet<MachineInstr *> TrackedWaitcntSet;
DenseSet<MachineInstr *> VCCZBugHandledSet;
- DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
- BlockWaitcntBracketsMap;
-
- std::vector<MachineBasicBlock *> BlockWaitcntProcessedSet;
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ std::unique_ptr<WaitcntBrackets> Incoming;
+ bool Dirty = true;
- DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
+ explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {}
+ };
- std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+ std::vector<BlockInfo> BlockInfos; // by reverse post-order traversal index
+ DenseMap<MachineBasicBlock *, unsigned> RpotIdxMap;
// ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
// because of amdgpu-waitcnt-forcezero flag
@@ -407,20 +397,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
- // The waitcnt information is copied because it changes as the block is
- // traversed.
- KillWaitBrackets.push_back(
- llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
- }
-
bool isForceEmitWaitcnt() const {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1))
+ for (auto T : inst_counter_types())
if (ForceEmitWaitcnt[T])
return true;
return false;
@@ -454,27 +435,22 @@ public:
}
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
- void generateWaitcntInstBefore(MachineInstr &MI,
- BlockWaitcntBrackets *ScoreBrackets);
+ bool generateWaitcntInstBefore(MachineInstr &MI,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr);
void updateEventWaitcntAfter(MachineInstr &Inst,
- BlockWaitcntBrackets *ScoreBrackets);
- void mergeInputScoreBrackets(MachineBasicBlock &Block);
- bool isLoopBottom(const MachineLoop *Loop, const MachineBasicBlock *Block);
- unsigned countNumBottomBlocks(const MachineLoop *Loop);
- void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
- void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
- bool isWaitcntStronger(unsigned LHS, unsigned RHS);
- unsigned combineWaitcnt(unsigned LHS, unsigned RHS);
+ WaitcntBrackets *ScoreBrackets);
+ bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets);
};
} // end anonymous namespace
-RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
- const SIInstrInfo *TII,
- const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI,
- unsigned OpNo,
- bool Def) const {
+RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI,
+ unsigned OpNo, bool Def) const {
const MachineOperand &Op = MI->getOperand(OpNo);
if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
(Def && !Op.isDef()))
@@ -512,11 +488,11 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
return Result;
}
-void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val) {
+void WaitcntBrackets::setExpScore(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, unsigned OpNo,
+ uint32_t Val) {
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
LLVM_DEBUG({
const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -527,26 +503,26 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
}
}
-void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI,
- WaitEventType E, MachineInstr &Inst) {
+void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ WaitEventType E, MachineInstr &Inst) {
const MachineRegisterInfo &MRIA = *MRI;
InstCounterType T = eventCounter(E);
- int32_t CurrScore = getScoreUB(T) + 1;
- // EventUB and ScoreUB need to be update regardless if this event changes
- // the score of a register or not.
+ uint32_t CurrScore = getScoreUB(T) + 1;
+ if (CurrScore == 0)
+ report_fatal_error("InsertWaitcnt score wraparound");
+ // PendingEvents and ScoreUB need to be update regardless if this event
+ // changes the score of a register or not.
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
- EventUBs[E] = CurrScore;
+ if (!hasPendingEvent(E)) {
+ if (PendingEvents & WaitEventMaskForInst[T])
+ MixedPendingEvents[T] = true;
+ PendingEvents |= 1 << E;
+ }
setScoreUB(T, CurrScore);
if (T == EXP_CNT) {
- // Check for mixed export types. If they are mixed, then a waitcnt exp(0)
- // is required.
- if (!MixedExpTypes) {
- MixedExpTypes = counterOutOfOrder(EXP_CNT);
- }
-
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
@@ -671,12 +647,11 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
-void BlockWaitcntBrackets::print(raw_ostream &OS) {
+void WaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int LB = getScoreLB(T);
- int UB = getScoreUB(T);
+ for (auto T : inst_counter_types()) {
+ uint32_t LB = getScoreLB(T);
+ uint32_t UB = getScoreUB(T);
switch (T) {
case VM_CNT:
@@ -696,10 +671,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
if (LB < UB) {
// Print vgpr scores.
for (int J = 0; J <= getMaxVGPR(); J++) {
- int RegScore = getRegScore(J, T);
+ uint32_t RegScore = getRegScore(J, T);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
OS << RelScore << ":v" << J << " ";
} else {
@@ -709,10 +684,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
// Also need to print sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
for (int J = 0; J <= getMaxSGPR(); J++) {
- int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
OS << RelScore << ":s" << J << " ";
}
}
@@ -722,23 +697,31 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
}
-unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
- int ScoreToWait) {
- unsigned int NeedWait = 0;
- if (ScoreToWait == -1) {
- // The score to wait is unknown. This implies that it was not encountered
- // during the path of the CFG walk done during the current traversal but
- // may be seen on a different path. Emit an s_wait counter with a
- // conservative value of 0 for the counter.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
- return NeedWait;
- }
+/// Simplify the waitcnt, in the sense of removing redundant counts, and return
+/// whether a waitcnt instruction is needed at all.
+bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
+ return simplifyWaitcnt(VM_CNT, Wait.VmCnt) |
+ simplifyWaitcnt(EXP_CNT, Wait.ExpCnt) |
+ simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+}
+bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
+ unsigned &Count) const {
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
+ if (Count < UB && UB - Count > LB)
+ return true;
+
+ Count = ~0u;
+ return false;
+}
+
+void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
+ AMDGPU::Waitcnt &Wait) const {
// If the score of src_operand falls within the bracket, we need an
// s_waitcnt instruction.
- const int32_t LB = getScoreLB(T);
- const int32_t UB = getScoreUB(T);
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
if ((T == VM_CNT || T == LGKM_CNT) &&
hasPendingFlat() &&
@@ -746,90 +729,46 @@ unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
// If there is a pending FLAT operation, and this is a VMem or LGKM
// waitcnt and the target can report early completion, then we need
// to force a waitcnt 0.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
+ addWait(Wait, T, 0);
} else if (counterOutOfOrder(T)) {
// Counter can get decremented out-of-order when there
// are multiple types event in the bracket. Also emit an s_wait counter
// with a conservative value of 0 for the counter.
- NeedWait = CNT_MASK(T);
- setScoreLB(T, getScoreUB(T));
+ addWait(Wait, T, 0);
} else {
- NeedWait = CNT_MASK(T);
- setScoreLB(T, ScoreToWait);
+ addWait(Wait, T, UB - ScoreToWait);
}
}
+}
- return NeedWait;
+void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
+ applyWaitcnt(VM_CNT, Wait.VmCnt);
+ applyWaitcnt(EXP_CNT, Wait.ExpCnt);
+ applyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
}
-// Where there are multiple types of event in the bracket of a counter,
-// the decrement may go out of order.
-bool BlockWaitcntBrackets::counterOutOfOrder(InstCounterType T) {
- switch (T) {
- case VM_CNT:
- return false;
- case LGKM_CNT: {
- if (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- // Scalar memory read always can go out of order.
- return true;
- }
- int NumEventTypes = 0;
- if (EventUBs[LDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[LDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[GDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
- EventUBs[GDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[SQ_MESSAGE] > ScoreLBs[LGKM_CNT] &&
- EventUBs[SQ_MESSAGE] <= ScoreUBs[LGKM_CNT]) {
- NumEventTypes++;
- }
- if (NumEventTypes <= 1) {
- return false;
- }
- break;
+void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
+ const uint32_t UB = getScoreUB(T);
+ if (Count >= UB)
+ return;
+ if (Count != 0) {
+ if (counterOutOfOrder(T))
+ return;
+ setScoreLB(T, std::max(getScoreLB(T), UB - Count));
+ } else {
+ setScoreLB(T, UB);
+ MixedPendingEvents[T] = false;
+ PendingEvents &= ~WaitEventMaskForInst[T];
}
- case EXP_CNT: {
- // If there has been a mixture of export types, then a waitcnt exp(0) is
- // required.
- if (MixedExpTypes)
- return true;
- int NumEventTypes = 0;
- if (EventUBs[EXP_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[GDS_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[GDS_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[VMW_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
- EventUBs[VMW_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
- if (EventUBs[EXP_PARAM_ACCESS] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_PARAM_ACCESS] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
-
- if (EventUBs[EXP_POS_ACCESS] > ScoreLBs[EXP_CNT] &&
- EventUBs[EXP_POS_ACCESS] <= ScoreUBs[EXP_CNT]) {
- NumEventTypes++;
- }
+}
- if (NumEventTypes <= 1) {
- return false;
- }
- break;
- }
- default:
- break;
- }
- return true;
+// Where there are multiple types of event in the bracket of a counter,
+// the decrement may go out of order.
+bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
+ // Scalar memory read always can go out of order.
+ if (T == LGKM_CNT && hasPendingEvent(SMEM_ACCESS))
+ return true;
+ return MixedPendingEvents[T];
}
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
@@ -851,29 +790,6 @@ static bool readsVCCZ(const MachineInstr &MI) {
!MI.getOperand(1).isUndef();
}
-/// Given wait count encodings checks if LHS is stronger than RHS.
-bool SIInsertWaitcnts::isWaitcntStronger(unsigned LHS, unsigned RHS) {
- if (AMDGPU::decodeVmcnt(IV, LHS) > AMDGPU::decodeVmcnt(IV, RHS))
- return false;
- if (AMDGPU::decodeLgkmcnt(IV, LHS) > AMDGPU::decodeLgkmcnt(IV, RHS))
- return false;
- if (AMDGPU::decodeExpcnt(IV, LHS) > AMDGPU::decodeExpcnt(IV, RHS))
- return false;
- return true;
-}
-
-/// Given wait count encodings create a new encoding which is stronger
-/// or equal to both.
-unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
- unsigned VmCnt = std::min(AMDGPU::decodeVmcnt(IV, LHS),
- AMDGPU::decodeVmcnt(IV, RHS));
- unsigned LgkmCnt = std::min(AMDGPU::decodeLgkmcnt(IV, LHS),
- AMDGPU::decodeLgkmcnt(IV, RHS));
- unsigned ExpCnt = std::min(AMDGPU::decodeExpcnt(IV, LHS),
- AMDGPU::decodeExpcnt(IV, RHS));
- return AMDGPU::encodeWaitcnt(IV, VmCnt, ExpCnt, LgkmCnt);
-}
-
/// Generate s_waitcnt instruction to be placed before cur_Inst.
/// Instructions of a given type are returned in order,
/// but instructions of different types can complete out of order.
@@ -884,51 +800,23 @@ unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
/// and if so what the value of each counter is.
/// The "score bracket" is bound by the lower bound and upper bound
/// scores (*_score_LB and *_score_ub respectively).
-void SIInsertWaitcnts::generateWaitcntInstBefore(
- MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) {
- // To emit, or not to emit - that's the question!
- // Start with an assumption that there is no need to emit.
- unsigned int EmitWaitcnt = 0;
-
- // No need to wait before phi. If a phi-move exists, then the wait should
- // has been inserted before the move. If a phi-move does not exist, then
- // wait should be inserted before the real use. The same is true for
- // sc-merge. It is not a coincident that all these cases correspond to the
- // instructions that are skipped in the assembling loop.
- bool NeedLineMapping = false; // TODO: Check on this.
-
- // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
- bool ForceEmitZeroWaitcnt = false;
-
+bool SIInsertWaitcnts::generateWaitcntInstBefore(
+ MachineInstr &MI, WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr) {
setForceEmitWaitcnt();
bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
- if (MI.isDebugInstr() &&
- // TODO: any other opcode?
- !NeedLineMapping) {
- return;
- }
+ if (MI.isDebugInstr())
+ return false;
- // See if an s_waitcnt is forced at block entry, or is needed at
- // program end.
- if (ScoreBrackets->getWaitAtBeginning()) {
- // Note that we have already cleared the state, so we don't need to update
- // it.
- ScoreBrackets->clearWaitAtBeginning();
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- EmitWaitcnt |= CNT_MASK(T);
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- }
- }
+ AMDGPU::Waitcnt Wait;
// See if this instruction has a forced S_WAITCNT VM.
// TODO: Handle other cases of NeedsWaitcntVmBefore()
- else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
- MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
- MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
- EmitWaitcnt |=
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+ Wait.VmCnt = 0;
}
// All waits must be resolved at call return.
@@ -936,23 +824,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- EmitWaitcnt |= CNT_MASK(T);
- }
- }
+ Wait = AMDGPU::Waitcnt::allZero();
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
AMDGPU::SendMsg::ID_GS_DONE)) {
- if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) {
- ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- EmitWaitcnt |= CNT_MASK(VM_CNT);
- }
+ Wait.VmCnt = 0;
}
#if 0 // TODO: the following blocks of logic when we have fence.
else if (MI.getOpcode() == SC_FENCE) {
@@ -1016,14 +895,12 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
// Export and GDS are tracked individually, either may trigger a waitcnt
// for EXEC.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK));
+ if (ScoreBrackets.hasPendingEvent(EXP_GPR_LOCK) ||
+ ScoreBrackets.hasPendingEvent(EXP_PARAM_ACCESS) ||
+ ScoreBrackets.hasPendingEvent(EXP_POS_ACCESS) ||
+ ScoreBrackets.hasPendingEvent(GDS_GPR_LOCK)) {
+ Wait.ExpCnt = 0;
+ }
}
#if 0 // TODO: the following code to handle CALL.
@@ -1051,27 +928,27 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// instruction.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
// VM_CNT is only relevant to vgpr or LDS.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
}
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
const MachineRegisterInfo &MRIA = *MRI;
RegInterval Interval =
- ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, false);
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
if (TRI->isVGPR(MRIA, Op.getReg())) {
// VM_CNT is only relevant to vgpr or LDS.
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
}
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
}
// End of for loop that looks at all source operands to decide vm_wait_cnt
@@ -1086,29 +963,29 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// FIXME: Should not be relying on memoperands.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
}
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &Def = MI.getOperand(I);
const MachineRegisterInfo &MRIA = *MRI;
RegInterval Interval =
- ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true);
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
if (TRI->isVGPR(MRIA, Def.getReg())) {
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
} // End of for loop that looks at all dest operands.
}
@@ -1119,182 +996,79 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// requiring a WAITCNT beforehand.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier()) {
- EmitWaitcnt |=
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
- EmitWaitcnt |= ScoreBrackets->updateByWait(
- LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT));
+ Wait = AMDGPU::Waitcnt::allZero();
}
// TODO: Remove this work-around, enable the assert for Bug 457939
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
- if (ScoreBrackets->getScoreLB(LGKM_CNT) <
- ScoreBrackets->getScoreUB(LGKM_CNT) &&
- ScoreBrackets->hasPendingSMEM()) {
- // Wait on everything, not just LGKM. vccz reads usually come from
- // terminators, and we always wait on everything at the end of the
- // block, so if we only wait on LGKM here, we might end up with
- // another s_waitcnt inserted right after this if there are non-LGKM
- // instructions still outstanding.
- // FIXME: this is too conservative / the comment is wrong.
- // We don't wait on everything at the end of the block and we combine
- // waitcnts so we should never have back-to-back waitcnts.
- ForceEmitZeroWaitcnt = true;
- EmitWaitcnt = true;
+ if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+ Wait.LgkmCnt = 0;
}
}
- // Does this operand processing indicate s_wait counter update?
- if (EmitWaitcnt || IsForceEmitWaitcnt) {
- int CntVal[NUM_INST_CNTS];
-
- bool UseDefaultWaitcntStrategy = true;
- if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
- // Force all waitcnts to 0.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- }
- CntVal[VM_CNT] = 0;
- CntVal[EXP_CNT] = 0;
- CntVal[LGKM_CNT] = 0;
- UseDefaultWaitcntStrategy = false;
- }
-
- if (UseDefaultWaitcntStrategy) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (EmitWaitcnt & CNT_MASK(T)) {
- int Delta =
- ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T);
- int MaxDelta = ScoreBrackets->getWaitCountMax(T);
- if (Delta >= MaxDelta) {
- Delta = -1;
- if (T != EXP_CNT) {
- ScoreBrackets->setScoreLB(
- T, ScoreBrackets->getScoreUB(T) - MaxDelta);
- }
- EmitWaitcnt &= ~CNT_MASK(T);
- }
- CntVal[T] = Delta;
- } else {
- // If we are not waiting for a particular counter then encode
- // it as -1 which means "don't care."
- CntVal[T] = -1;
- }
+ // Early-out if no wait is indicated.
+ if (!ScoreBrackets.simplifyWaitcnt(Wait) && !IsForceEmitWaitcnt) {
+ bool Modified = false;
+ if (OldWaitcntInstr) {
+ if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+ TrackedWaitcntSet.erase(OldWaitcntInstr);
+ OldWaitcntInstr->eraseFromParent();
+ Modified = true;
+ } else {
+ int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
}
+ Modified = true;
}
+ return Modified;
+ }
- // If we are not waiting on any counter we can skip the wait altogether.
- if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
- MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
- int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
- if (!OldWaitcnt ||
- (AMDGPU::decodeVmcnt(IV, Imm) !=
- (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
- (AMDGPU::decodeExpcnt(IV, Imm) !=
- (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
- (AMDGPU::decodeLgkmcnt(IV, Imm) !=
- (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
- MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
- if (ContainingLoop) {
- MachineBasicBlock *TBB = ContainingLoop->getHeader();
- BlockWaitcntBrackets *ScoreBracket =
- BlockWaitcntBracketsMap[TBB].get();
- if (!ScoreBracket) {
- assert(!BlockVisitedSet.count(TBB));
- BlockWaitcntBracketsMap[TBB] =
- llvm::make_unique<BlockWaitcntBrackets>(ST);
- ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
- }
- ScoreBracket->setRevisitLoop(true);
- LLVM_DEBUG(dbgs()
- << "set-revisit2: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
- }
- }
+ if (ForceEmitZeroWaitcnts)
+ Wait = AMDGPU::Waitcnt::allZero();
- // Update an existing waitcount, or make a new one.
- unsigned Enc = AMDGPU::encodeWaitcnt(IV,
- ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
- ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
- ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
- // We don't remove waitcnts that existed prior to the waitcnt
- // pass. Check if the waitcnt to-be-inserted can be avoided
- // or if the prev waitcnt can be updated.
- bool insertSWaitInst = true;
- for (MachineBasicBlock::iterator I = MI.getIterator(),
- B = MI.getParent()->begin();
- insertSWaitInst && I != B; --I) {
- if (I == MI.getIterator())
- continue;
+ if (ForceEmitWaitcnt[VM_CNT])
+ Wait.VmCnt = 0;
+ if (ForceEmitWaitcnt[EXP_CNT])
+ Wait.ExpCnt = 0;
+ if (ForceEmitWaitcnt[LGKM_CNT])
+ Wait.LgkmCnt = 0;
- switch (I->getOpcode()) {
- case AMDGPU::S_WAITCNT:
- if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
- insertSWaitInst = false;
- else if (!OldWaitcnt) {
- OldWaitcnt = &*I;
- Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
- }
- break;
- // TODO: skip over instructions which never require wait.
- }
- break;
- }
- if (insertSWaitInst) {
- if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
- if (ForceEmitZeroWaitcnts)
- LLVM_DEBUG(
- dbgs()
- << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
- if (IsForceEmitWaitcnt)
- LLVM_DEBUG(dbgs()
- << "Force emit a s_waitcnt due to debug counter\n");
-
- OldWaitcnt->getOperand(0).setImm(Enc);
- if (!OldWaitcnt->getParent())
- MI.getParent()->insert(MI, OldWaitcnt);
-
- LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *OldWaitcnt << '\n');
- } else {
- auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
- MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(Enc);
- TrackedWaitcntSet.insert(SWaitInst);
-
- LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *SWaitInst << '\n');
- }
- }
+ ScoreBrackets.applyWaitcnt(Wait);
- if (CntVal[EXP_CNT] == 0) {
- ScoreBrackets->setMixedExpTypes(false);
- }
- }
+ AMDGPU::Waitcnt OldWait;
+ if (OldWaitcntInstr) {
+ OldWait =
+ AMDGPU::decodeWaitcnt(IV, OldWaitcntInstr->getOperand(0).getImm());
}
-}
+ if (OldWait.dominates(Wait))
+ return false;
-void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
- MachineInstr *Waitcnt) {
- if (MBB.empty()) {
- MBB.push_back(Waitcnt);
- return;
- }
+ if (OldWaitcntInstr && !TrackedWaitcntSet.count(OldWaitcntInstr))
+ Wait = Wait.combined(OldWait);
- MachineBasicBlock::iterator It = MBB.end();
- MachineInstr *MI = &*(--It);
- if (MI->isBranch()) {
- MBB.insert(It, Waitcnt);
+ unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
+ if (OldWaitcntInstr) {
+ OldWaitcntInstr->getOperand(0).setImm(Enc);
+
+ LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *OldWaitcntInstr << '\n');
} else {
- MBB.push_back(Waitcnt);
+ auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
+ MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(Enc);
+ TrackedWaitcntSet.insert(SWaitInst);
+
+ LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *SWaitInst << '\n');
}
+
+ return true;
}
// This is a flat memory operation. Check to see if it has memory
@@ -1305,15 +1079,15 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
- if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
return true;
}
return false;
}
-void SIInsertWaitcnts::updateEventWaitcntAfter(
- MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) {
+void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
+ WaitcntBrackets *ScoreBrackets) {
// Now look at the instruction opcode. If it is a memory access
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
@@ -1379,342 +1153,124 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(
}
}
-// Merge the score brackets of the Block's predecessors;
-// this merged score bracket is used when adding waitcnts to the Block
-void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
- int32_t MaxPending[NUM_INST_CNTS] = {0};
- int32_t MaxFlat[NUM_INST_CNTS] = {0};
- bool MixedExpTypes = false;
-
- // For single basic block loops, we need to retain the Block's
- // score bracket to have accurate Pred info. So, make a copy of Block's
- // score bracket, clear() it (which retains several important bits of info),
- // populate, and then replace en masse. For non-single basic block loops,
- // just clear Block's current score bracket and repopulate in-place.
- bool IsSelfPred;
- std::unique_ptr<BlockWaitcntBrackets> S;
-
- IsSelfPred = (std::find(Block.pred_begin(), Block.pred_end(), &Block))
- != Block.pred_end();
- if (IsSelfPred) {
- S = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
- ScoreBrackets = S.get();
- }
-
- ScoreBrackets->clear();
-
- // See if there are any uninitialized predecessors. If so, emit an
- // s_waitcnt 0 at the beginning of the block.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
- bool Visited = BlockVisitedSet.count(Pred);
- if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
- continue;
- }
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int span =
- PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
- MaxPending[T] = std::max(MaxPending[T], span);
- span =
- PredScoreBrackets->pendingFlat(T) - PredScoreBrackets->getScoreLB(T);
- MaxFlat[T] = std::max(MaxFlat[T], span);
- }
-
- MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Also handle kills for exit block.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int Span = KillWaitBrackets[I]->getScoreUB(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxPending[T] = std::max(MaxPending[T], Span);
- Span = KillWaitBrackets[I]->pendingFlat(T) -
- KillWaitBrackets[I]->getScoreLB(T);
- MaxFlat[T] = std::max(MaxFlat[T], Span);
- }
-
- MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
- }
- }
-
- // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
- bool Visited = BlockVisitedSet.count(Pred);
- if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
- continue;
- }
-
- int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) -
- PredScoreBrackets->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
- int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
- PredScoreBrackets->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
- int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
- KillWaitBrackets[I]->getScoreLB(EXP_CNT);
- MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
- }
- }
-
-#if 0
- // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
- // TODO: how does LC distinguish between function entry and main entry?
- // If this is the entry to a function, force a wait.
- MachineBasicBlock &Entry = Block.getParent()->front();
- if (Entry.getNumber() == Block.getNumber()) {
- ScoreBrackets->setWaitAtBeginning();
- return;
- }
-#endif
-
- // Now set the current Block's brackets to the largest ending bracket.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- ScoreBrackets->setScoreUB(T, MaxPending[T]);
- ScoreBrackets->setScoreLB(T, 0);
- ScoreBrackets->setLastFlat(T, MaxFlat[T]);
- }
-
- ScoreBrackets->setMixedExpTypes(MixedExpTypes);
-
- // Set the register scoreboard.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- if (!BlockVisitedSet.count(Pred)) {
- continue;
- }
-
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
-
- // Now merge the gpr_reg_score information
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int PredLB = PredScoreBrackets->getScoreLB(T);
- int PredUB = PredScoreBrackets->getScoreUB(T);
- if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
- // Merge vgpr scores.
- for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
- int PredRegScore = PredScoreBrackets->getRegScore(J, T);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
- }
- // Also need to merge sgpr scores for lgkm_cnt.
- if (T == LGKM_CNT) {
- for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
- int PredRegScore =
- PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J + NUM_ALL_VGPRS, LGKM_CNT,
- std::max(
- ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
- NewRegScore));
- }
- }
- }
- }
-
- // Also merge the WaitEvent information.
- ForAllWaitEventType(W) {
- enum InstCounterType T = PredScoreBrackets->eventCounter(W);
- int PredEventUB = PredScoreBrackets->getEventUB(W);
- if (PredEventUB > PredScoreBrackets->getScoreLB(T)) {
- int NewEventUB =
- MaxPending[T] + PredEventUB - PredScoreBrackets->getScoreUB(T);
- if (NewEventUB > 0) {
- ScoreBrackets->setEventUB(
- W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
- }
- }
- }
- }
-
- // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
- // Set the register scoreboard.
- if (Block.succ_empty() && !KillWaitBrackets.empty()) {
- for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
- // Now merge the gpr_reg_score information.
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- int PredLB = KillWaitBrackets[I]->getScoreLB(T);
- int PredUB = KillWaitBrackets[I]->getScoreUB(T);
- if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
- // Merge vgpr scores.
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
- int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
- }
- // Also need to merge sgpr scores for lgkm_cnt.
- if (T == LGKM_CNT) {
- for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
- int PredRegScore =
- KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
- if (PredRegScore <= PredLB)
- continue;
- int NewRegScore = PredScale + PredRegScore;
- ScoreBrackets->setRegScore(
- J + NUM_ALL_VGPRS, LGKM_CNT,
- std::max(
- ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
- NewRegScore));
- }
- }
- }
- }
-
- // Also merge the WaitEvent information.
- ForAllWaitEventType(W) {
- enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
- int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
- if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
- int NewEventUB =
- MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
- if (NewEventUB > 0) {
- ScoreBrackets->setEventUB(
- W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
- }
- }
- }
- }
- }
-
- // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
- // sequencing predecessors, because changes to EXEC require waitcnts due to
- // the delayed nature of these operations.
- for (MachineBasicBlock *Pred : Block.predecessors()) {
- if (!BlockVisitedSet.count(Pred)) {
- continue;
- }
+bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score,
+ uint32_t OtherScore) {
+ uint32_t MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift;
+ uint32_t OtherShifted =
+ OtherScore <= M.OtherLB ? 0 : OtherScore + M.OtherShift;
+ Score = std::max(MyShifted, OtherShifted);
+ return OtherShifted > MyShifted;
+}
- BlockWaitcntBrackets *PredScoreBrackets =
- BlockWaitcntBracketsMap[Pred].get();
-
- int pred_gds_ub = PredScoreBrackets->getEventUB(GDS_GPR_LOCK);
- if (pred_gds_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
- int new_gds_ub = MaxPending[EXP_CNT] + pred_gds_ub -
- PredScoreBrackets->getScoreUB(EXP_CNT);
- if (new_gds_ub > 0) {
- ScoreBrackets->setEventUB(
- GDS_GPR_LOCK,
- std::max(ScoreBrackets->getEventUB(GDS_GPR_LOCK), new_gds_ub));
- }
- }
- int pred_exp_ub = PredScoreBrackets->getEventUB(EXP_GPR_LOCK);
- if (pred_exp_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
- int new_exp_ub = MaxPending[EXP_CNT] + pred_exp_ub -
- PredScoreBrackets->getScoreUB(EXP_CNT);
- if (new_exp_ub > 0) {
- ScoreBrackets->setEventUB(
- EXP_GPR_LOCK,
- std::max(ScoreBrackets->getEventUB(EXP_GPR_LOCK), new_exp_ub));
+/// Merge the pending events and associater score brackets of \p Other into
+/// this brackets status.
+///
+/// Returns whether the merge resulted in a change that requires tighter waits
+/// (i.e. the merged brackets strictly dominate the original brackets).
+bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
+ bool StrictDom = false;
+
+ for (auto T : inst_counter_types()) {
+ // Merge event flags for this counter
+ const bool OldOutOfOrder = counterOutOfOrder(T);
+ const uint32_t OldEvents = PendingEvents & WaitEventMaskForInst[T];
+ const uint32_t OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
+ if (OtherEvents & ~OldEvents)
+ StrictDom = true;
+ if (Other.MixedPendingEvents[T] ||
+ (OldEvents && OtherEvents && OldEvents != OtherEvents))
+ MixedPendingEvents[T] = true;
+ PendingEvents |= OtherEvents;
+
+ // Merge scores for this counter
+ const uint32_t MyPending = ScoreUBs[T] - ScoreLBs[T];
+ const uint32_t OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T];
+ MergeInfo M;
+ M.OldLB = ScoreLBs[T];
+ M.OtherLB = Other.ScoreLBs[T];
+ M.MyShift = OtherPending > MyPending ? OtherPending - MyPending : 0;
+ M.OtherShift = ScoreUBs[T] - Other.ScoreUBs[T] + M.MyShift;
+
+ const uint32_t NewUB = ScoreUBs[T] + M.MyShift;
+ if (NewUB < ScoreUBs[T])
+ report_fatal_error("waitcnt score overflow");
+ ScoreUBs[T] = NewUB;
+ ScoreLBs[T] = std::min(M.OldLB + M.MyShift, M.OtherLB + M.OtherShift);
+
+ StrictDom |= mergeScore(M, LastFlat[T], Other.LastFlat[T]);
+
+ bool RegStrictDom = false;
+ for (int J = 0, E = std::max(getMaxVGPR(), Other.getMaxVGPR()) + 1; J != E;
+ J++) {
+ RegStrictDom |= mergeScore(M, VgprScores[T][J], Other.VgprScores[T][J]);
+ }
+
+ if (T == LGKM_CNT) {
+ for (int J = 0, E = std::max(getMaxSGPR(), Other.getMaxSGPR()) + 1;
+ J != E; J++) {
+ RegStrictDom |= mergeScore(M, SgprScores[J], Other.SgprScores[J]);
}
}
- }
- // if a single block loop, update the score brackets. Not needed for other
- // blocks, as we did this in-place
- if (IsSelfPred) {
- BlockWaitcntBracketsMap[&Block] = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
+ if (RegStrictDom && !OldOutOfOrder)
+ StrictDom = true;
}
-}
-/// Return true if the given basic block is a "bottom" block of a loop.
-/// This works even if the loop is discontiguous. This also handles
-/// multiple back-edges for the same "header" block of a loop.
-bool SIInsertWaitcnts::isLoopBottom(const MachineLoop *Loop,
- const MachineBasicBlock *Block) {
- for (MachineBasicBlock *MBB : Loop->blocks()) {
- if (MBB == Block && MBB->isSuccessor(Loop->getHeader())) {
- return true;
- }
- }
- return false;
-}
+ VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR());
+ SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR());
-/// Count the number of "bottom" basic blocks of a loop.
-unsigned SIInsertWaitcnts::countNumBottomBlocks(const MachineLoop *Loop) {
- unsigned Count = 0;
- for (MachineBasicBlock *MBB : Loop->blocks()) {
- if (MBB->isSuccessor(Loop->getHeader())) {
- Count++;
- }
- }
- return Count;
+ return StrictDom;
}
// Generate s_waitcnt instructions where needed.
-void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
- MachineBasicBlock &Block) {
- // Initialize the state information.
- mergeInputScoreBrackets(Block);
-
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
+ MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets) {
+ bool Modified = false;
LLVM_DEBUG({
dbgs() << "*** Block" << Block.getNumber() << " ***";
- ScoreBrackets->dump();
+ ScoreBrackets.dump();
});
// Walk over the instructions.
+ MachineInstr *OldWaitcntInstr = nullptr;
+
for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
Iter != E;) {
MachineInstr &Inst = *Iter;
+
// Remove any previously existing waitcnts.
if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
- // Leave pre-existing waitcnts, but note their existence via setWaitcnt.
- // Remove the waitcnt-pass-generated waitcnts; the pass will add them back
- // as needed.
- if (!TrackedWaitcntSet.count(&Inst))
- ++Iter;
- else {
- ++Iter;
- Inst.removeFromParent();
+ if (OldWaitcntInstr) {
+ if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+ TrackedWaitcntSet.erase(OldWaitcntInstr);
+ OldWaitcntInstr->eraseFromParent();
+ OldWaitcntInstr = nullptr;
+ } else if (!TrackedWaitcntSet.count(&Inst)) {
+ // Two successive s_waitcnt's, both of which are pre-existing and
+ // are therefore preserved.
+ int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+ } else {
+ ++Iter;
+ Inst.eraseFromParent();
+ Modified = true;
+ continue;
+ }
}
- ScoreBrackets->setWaitcnt(&Inst);
- continue;
- }
- // Kill instructions generate a conditional branch to the endmain block.
- // Merge the current waitcnt state into the endmain block information.
- // TODO: Are there other flavors of KILL instruction?
- if (Inst.getOpcode() == AMDGPU::KILL) {
- addKillWaitBracket(ScoreBrackets);
+ OldWaitcntInstr = &Inst;
+ ++Iter;
+ continue;
}
bool VCCZBugWorkAround = false;
if (readsVCCZ(Inst) &&
(!VCCZBugHandledSet.count(&Inst))) {
- if (ScoreBrackets->getScoreLB(LGKM_CNT) <
- ScoreBrackets->getScoreUB(LGKM_CNT) &&
- ScoreBrackets->hasPendingSMEM()) {
+ if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
VCCZBugWorkAround = true;
}
@@ -1722,9 +1278,10 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// Generate an s_waitcnt instruction to be placed before
// cur_Inst, if needed.
- generateWaitcntInstBefore(Inst, ScoreBrackets);
+ Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
+ OldWaitcntInstr = nullptr;
- updateEventWaitcntAfter(Inst, ScoreBrackets);
+ updateEventWaitcntAfter(Inst, &ScoreBrackets);
#if 0 // TODO: implement resource type check controlled by options with ub = LB.
// If this instruction generates a S_SETVSKIP because it is an
@@ -1737,11 +1294,9 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
#endif
- ScoreBrackets->clearWaitcnt();
-
LLVM_DEBUG({
Inst.print(dbgs());
- ScoreBrackets->dump();
+ ScoreBrackets.dump();
});
// Check to see if this is a GWS instruction. If so, and if this is CI or
@@ -1753,10 +1308,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
// TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
- ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
- ScoreBrackets->updateByWait(LGKM_CNT,
- ScoreBrackets->getScoreUB(LGKM_CNT));
+ ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
}
// TODO: Remove this work-around after fixing the scheduler and enable the
@@ -1769,71 +1321,13 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
AMDGPU::VCC)
.addReg(AMDGPU::VCC);
VCCZBugHandledSet.insert(&Inst);
+ Modified = true;
}
++Iter;
}
- // Check if we need to force convergence at loop footer.
- MachineLoop *ContainingLoop = MLI->getLoopFor(&Block);
- if (ContainingLoop && isLoopBottom(ContainingLoop, &Block)) {
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- WaitcntData->print();
- LLVM_DEBUG(dbgs() << '\n';);
-
- // The iterative waitcnt insertion algorithm aims for optimal waitcnt
- // placement, but doesn't guarantee convergence for a loop. Each
- // loop should take at most (n+1) iterations for it to converge naturally,
- // where n is the number of bottom blocks. If this threshold is reached and
- // the result hasn't converged, then we force convergence by inserting
- // a s_waitcnt at the end of loop footer.
- if (WaitcntData->getIterCnt() > (countNumBottomBlocks(ContainingLoop) + 1)) {
- // To ensure convergence, need to make wait events at loop footer be no
- // more than those from the previous iteration.
- // As a simplification, instead of tracking individual scores and
- // generating the precise wait count, just wait on 0.
- bool HasPending = false;
- MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1)) {
- if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
- ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
- HasPending = true;
- break;
- }
- }
-
- if (HasPending) {
- if (!SWaitInst) {
- SWaitInst = BuildMI(Block, Block.getFirstNonPHI(),
- DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
- TrackedWaitcntSet.insert(SWaitInst);
-#if 0 // TODO: Format the debug output
- OutputTransformBanner("insertWaitcntInBlock",0,"Create:",context);
- OutputTransformAdd(SWaitInst, context);
-#endif
- }
-#if 0 // TODO: ??
- _DEV( REPORTED_STATS->force_waitcnt_converge = 1; )
-#endif
- }
-
- if (SWaitInst) {
- LLVM_DEBUG({
- SWaitInst->print(dbgs());
- dbgs() << "\nAdjusted score board:";
- ScoreBrackets->dump();
- });
-
- // Add this waitcnt to the block. It is either newly created or
- // created in previous iterations and added back since block traversal
- // always removes waitcnts.
- insertWaitcntBeforeCF(Block, SWaitInst);
- WaitcntData->setWaitcnt(SWaitInst);
- }
- }
- }
+ return Modified;
}
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
@@ -1841,14 +1335,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- MLI = &getAnalysis<MachineLoopInfo>();
- IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- AMDGPUASI = ST->getAMDGPUAS();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
- for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
- T = (enum InstCounterType)(T + 1))
+ for (auto T : inst_counter_types())
ForceEmitWaitcnt[T] = false;
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1868,93 +1359,70 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
TrackedWaitcntSet.clear();
- BlockVisitedSet.clear();
VCCZBugHandledSet.clear();
- LoopWaitcntDataMap.clear();
- BlockWaitcntProcessedSet.clear();
+ RpotIdxMap.clear();
+ BlockInfos.clear();
+
+ // Keep iterating over the blocks in reverse post order, inserting and
+ // updating s_waitcnt where needed, until a fix point is reached.
+ for (MachineBasicBlock *MBB :
+ ReversePostOrderTraversal<MachineFunction *>(&MF)) {
+ RpotIdxMap[MBB] = BlockInfos.size();
+ BlockInfos.emplace_back(MBB);
+ }
- // Walk over the blocks in reverse post-dominator order, inserting
- // s_waitcnt where needed.
- ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ std::unique_ptr<WaitcntBrackets> Brackets;
bool Modified = false;
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- I = RPOT.begin(),
- E = RPOT.end(), J = RPOT.begin();
- I != E;) {
- MachineBasicBlock &MBB = **I;
-
- BlockVisitedSet.insert(&MBB);
-
- BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
- if (!ScoreBrackets) {
- BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
- ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
- }
- ScoreBrackets->setPostOrder(MBB.getNumber());
- MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
- if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
- LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique<LoopWaitcntData>();
-
- // If we are walking into the block from before the loop, then guarantee
- // at least 1 re-walk over the loop to propagate the information, even if
- // no S_WAITCNT instructions were generated.
- if (ContainingLoop && ContainingLoop->getHeader() == &MBB) {
- unsigned Count = countNumBottomBlocks(ContainingLoop);
-
- // If the loop has multiple back-edges, and so more than one "bottom"
- // basic block, we have to guarantee a re-walk over every blocks.
- if ((std::count(BlockWaitcntProcessedSet.begin(),
- BlockWaitcntProcessedSet.end(), &MBB) < (int)Count)) {
- BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
- LLVM_DEBUG(dbgs() << "set-revisit1: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
+ bool Repeat;
+ do {
+ Repeat = false;
+
+ for (BlockInfo &BI : BlockInfos) {
+ if (!BI.Dirty)
+ continue;
+
+ unsigned Idx = std::distance(&*BlockInfos.begin(), &BI);
+
+ if (BI.Incoming) {
+ if (!Brackets)
+ Brackets = llvm::make_unique<WaitcntBrackets>(*BI.Incoming);
+ else
+ *Brackets = *BI.Incoming;
+ } else {
+ if (!Brackets)
+ Brackets = llvm::make_unique<WaitcntBrackets>(ST);
+ else
+ Brackets->clear();
}
- }
- // Walk over the instructions.
- insertWaitcntInBlock(MF, MBB);
-
- // Record that waitcnts have been processed at least once for this block.
- BlockWaitcntProcessedSet.push_back(&MBB);
-
- // See if we want to revisit the loop. If a loop has multiple back-edges,
- // we shouldn't revisit the same "bottom" basic block.
- if (ContainingLoop && isLoopBottom(ContainingLoop, &MBB) &&
- std::count(BlockWaitcntProcessedSet.begin(),
- BlockWaitcntProcessedSet.end(), &MBB) == 1) {
- MachineBasicBlock *EntryBB = ContainingLoop->getHeader();
- BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
- if (EntrySB && EntrySB->getRevisitLoop()) {
- EntrySB->setRevisitLoop(false);
- J = I;
- int32_t PostOrder = EntrySB->getPostOrder();
- // TODO: Avoid this loop. Find another way to set I.
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- X = RPOT.begin(),
- Y = RPOT.end();
- X != Y; ++X) {
- MachineBasicBlock &MBBX = **X;
- if (MBBX.getNumber() == PostOrder) {
- I = X;
- break;
+ Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
+ BI.Dirty = false;
+
+ if (Brackets->hasPending()) {
+ BlockInfo *MoveBracketsToSucc = nullptr;
+ for (MachineBasicBlock *Succ : BI.MBB->successors()) {
+ unsigned SuccIdx = RpotIdxMap[Succ];
+ BlockInfo &SuccBI = BlockInfos[SuccIdx];
+ if (!SuccBI.Incoming) {
+ SuccBI.Dirty = true;
+ if (SuccIdx <= Idx)
+ Repeat = true;
+ if (!MoveBracketsToSucc) {
+ MoveBracketsToSucc = &SuccBI;
+ } else {
+ SuccBI.Incoming = llvm::make_unique<WaitcntBrackets>(*Brackets);
+ }
+ } else if (SuccBI.Incoming->merge(*Brackets)) {
+ SuccBI.Dirty = true;
+ if (SuccIdx <= Idx)
+ Repeat = true;
}
}
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- WaitcntData->incIterCnt();
- LLVM_DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
- continue;
- } else {
- LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
- // Loop converged, reset iteration count. If this loop gets revisited,
- // it must be from an outer loop, the counter will restart, this will
- // ensure we don't force convergence on such revisits.
- WaitcntData->resetIterCnt();
+ if (MoveBracketsToSucc)
+ MoveBracketsToSucc->Incoming = std::move(Brackets);
}
}
-
- J = I;
- ++I;
- }
+ } while (Repeat);
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index b73d30940fc3..65ffc27b8b60 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -121,6 +121,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is a D16 buffer instruction.
field bit D16Buf = 0;
+ // This bit indicates that this uses the floating point double precision
+ // rounding mode flags
+ field bit FPDPRounding = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -178,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{50} = D16Buf;
+ let TSFlags{51} = FPDPRounding;
+
let SchedRW = [Write32Bit];
field bits<1> DisableSIDecoder = 0;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f3745382a6f4..2370d5fa7b27 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -264,9 +265,10 @@ static bool isStride64(unsigned Opc) {
}
}
-bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned Opc = LdSt.getOpcode();
if (isDS(LdSt)) {
@@ -274,11 +276,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
getNamedOperand(LdSt, AMDGPU::OpName::offset);
if (OffsetImm) {
// Normal, single offset LDS instruction.
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::addr);
-
- BaseReg = AddrReg->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Offset = OffsetImm->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -309,10 +310,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (isStride64(Opc))
EltSize *= 64;
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::addr);
- BaseReg = AddrReg->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Offset = EltSize * Offset0;
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -324,19 +325,20 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (SOffset && SOffset->isReg())
return false;
- const MachineOperand *AddrReg =
- getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg)
return false;
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
- BaseReg = AddrReg->getReg();
+ BaseOp = AddrReg;
Offset = OffsetImm->getImm();
if (SOffset) // soffset can be an inline immediate.
Offset += SOffset->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -346,36 +348,46 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
if (!OffsetImm)
return false;
- const MachineOperand *SBaseReg =
- getNamedOperand(LdSt, AMDGPU::OpName::sbase);
- BaseReg = SBaseReg->getReg();
+ MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+ BaseOp = SBaseReg;
Offset = OffsetImm->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
if (isFLAT(LdSt)) {
- const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (VAddr) {
// Can't analyze 2 offsets.
if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
return false;
- BaseReg = VAddr->getReg();
+ BaseOp = VAddr;
} else {
// scratch instructions have either vaddr or saddr.
- BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
}
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
return false;
}
-static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
- const MachineInstr &MI2, unsigned BaseReg2) {
- if (BaseReg1 == BaseReg2)
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+ const MachineOperand &BaseOp1,
+ const MachineInstr &MI2,
+ const MachineOperand &BaseOp2) {
+ // Support only base operands with base registers.
+ // Note: this could be extended to support FI operands.
+ if (!BaseOp1.isReg() || !BaseOp2.isReg())
+ return false;
+
+ if (BaseOp1.isIdenticalTo(BaseOp2))
return true;
if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
@@ -401,12 +413,13 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
return Base1 == Base2;
}
-bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
- unsigned BaseReg1,
- MachineInstr &SecondLdSt,
- unsigned BaseReg2,
+bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+ MachineOperand &BaseOp2,
unsigned NumLoads) const {
- if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
+ MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ MachineInstr &SecondLdSt = *BaseOp2.getParent();
+
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
return false;
const MachineOperand *FirstDst = nullptr;
@@ -863,7 +876,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
@@ -907,16 +920,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
- LLVMContext &Ctx = MF->getFunction().getContext();
- Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
- " spill register");
- BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
- .addReg(SrcReg);
-
- return;
- }
-
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
@@ -972,9 +975,9 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned SpillSize = TRI->getSpillSize(*RC);
@@ -986,6 +989,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
PtrInfo, MachineMemOperand::MOLoad, Size, Align);
if (RI.isSGPRClass(RC)) {
+ MFI->setHasSpilledSGPRs();
+
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
@@ -1009,15 +1014,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
- LLVMContext &Ctx = MF->getFunction().getContext();
- Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
- " restore register");
- BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
-
- return;
- }
-
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
@@ -1036,7 +1032,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
@@ -1044,7 +1040,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
if (!MFI->hasCalculatedTID()) {
MachineBasicBlock &Entry = MBB.getParent()->front();
MachineBasicBlock::iterator Insert = Entry.front();
- DebugLoc DL = Insert->getDebugLoc();
+ const DebugLoc &DL = Insert->getDebugLoc();
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
*MF);
@@ -1421,10 +1417,15 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
- if (!MI.isCommutable())
+ return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
+}
+
+bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
+ unsigned &SrcOpIdx1) const {
+ if (!Desc.isCommutable())
return false;
- unsigned Opc = MI.getOpcode();
+ unsigned Opc = Desc.getOpcode();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
return false;
@@ -1549,8 +1550,9 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// buzz;
RS->enterBasicBlockEnd(MBB);
- unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
- MachineBasicBlock::iterator(GetPC), 0);
+ unsigned Scav = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_64RegClass,
+ MachineBasicBlock::iterator(GetPC), false, 0);
MRI.replaceRegWith(PCReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
@@ -1644,7 +1646,34 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
MachineBasicBlock::iterator I = MBB.getFirstTerminator();
- if (I == MBB.end())
+ auto E = MBB.end();
+ if (I == E)
+ return false;
+
+ // Skip over the instructions that are artificially terminators for special
+ // exec management.
+ while (I != E && !I->isBranch() && !I->isReturn() &&
+ I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
+ switch (I->getOpcode()) {
+ case AMDGPU::SI_MASK_BRANCH:
+ case AMDGPU::S_MOV_B64_term:
+ case AMDGPU::S_XOR_B64_term:
+ case AMDGPU::S_ANDN2_B64_term:
+ break;
+ case AMDGPU::SI_IF:
+ case AMDGPU::SI_ELSE:
+ case AMDGPU::SI_KILL_I1_TERMINATOR:
+ case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+ // FIXME: It's messy that these need to be considered here at all.
+ return true;
+ default:
+ llvm_unreachable("unexpected non-branch terminator inst");
+ }
+
+ ++I;
+ }
+
+ if (I == E)
return false;
if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
@@ -1933,20 +1962,20 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
}
unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const {
+ unsigned Kind) const {
switch(Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+ return AMDGPUAS::PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+ return AMDGPUAS::CONSTANT_ADDRESS;
}
- return ST.getAMDGPUAS().FLAT_ADDRESS;
+ return AMDGPUAS::FLAT_ADDRESS;
}
static void removeModOperands(MachineInstr &MI) {
@@ -2066,12 +2095,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Src2->isReg() && Src2->getReg() == Reg) {
// Not allowed to use constant bus for another operand.
// We can however allow an inline immediate as src0.
- if (!Src0->isImm() &&
- (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
- return false;
+ bool Src0Inlined = false;
+ if (Src0->isReg()) {
+ // Try to inline constant if possible.
+ // If the Def moves immediate and the use is single
+ // We are saving VGPR here.
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src0->getReg())) {
+ Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+ Src0Inlined = true;
+ } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+ (RI.isVirtualRegister(Src0->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+ return false;
+ // VGPR is okay as Src0 - fallthrough
+ }
- if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
- return false;
+ if (Src1->isReg() && !Src0Inlined ) {
+ // We have one slot for inlinable constant so far - try to fill it
+ MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
+ if (Def && Def->isMoveImmediate() &&
+ isInlineConstant(Def->getOperand(1)) &&
+ MRI->hasOneUse(Src1->getReg()) &&
+ commuteInstruction(UseMI)) {
+ Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+ } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+ (RI.isVirtualRegister(Src1->getReg()) &&
+ RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+ return false;
+ // VGPR is okay as Src1 - fallthrough
+ }
const int64_t Imm = ImmOp->getImm();
@@ -2117,11 +2174,13 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
MachineInstr &MIb) const {
- unsigned BaseReg0, BaseReg1;
+ MachineOperand *BaseOp0, *BaseOp1;
int64_t Offset0, Offset1;
- if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
- getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+ if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
+ getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
+ if (!BaseOp0->isIdenticalTo(*BaseOp1))
+ return false;
if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
// FIXME: Handle ds_read2 / ds_write2.
@@ -2129,8 +2188,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
}
unsigned Width0 = (*MIa.memoperands_begin())->getSize();
unsigned Width1 = (*MIb.memoperands_begin())->getSize();
- if (BaseReg0 == BaseReg1 &&
- offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+ if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
return true;
}
}
@@ -2398,8 +2456,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
- return Trunc == Imm &&
- AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -2523,6 +2580,115 @@ bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
hasModifiersSet(MI, AMDGPU::OpName::omod);
}
+bool SIInstrInfo::canShrink(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ // Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrinking
+ // post-regalloc.
+ if (Src2) {
+ switch (MI.getOpcode()) {
+ default: return false;
+
+ case AMDGPU::V_ADDC_U32_e64:
+ case AMDGPU::V_SUBB_U32_e64:
+ case AMDGPU::V_SUBBREV_U32_e64: {
+ const MachineOperand *Src1
+ = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
+ return false;
+ // Additional verification is needed for sdst/src2.
+ return true;
+ }
+ case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_F16_e64:
+ case AMDGPU::V_FMAC_F32_e64:
+ if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+ return false;
+ break;
+
+ case AMDGPU::V_CNDMASK_B32_e64:
+ break;
+ }
+ }
+
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
+ return false;
+
+ // We don't need to check src0, all input types are legal, so just make sure
+ // src0 isn't using any modifiers.
+ if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
+ return false;
+
+ // Can it be shrunk to a valid 32 bit opcode?
+ if (!hasVALU32BitEncoding(MI.getOpcode()))
+ return false;
+
+ // Check output modifiers
+ return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+ !hasModifiersSet(MI, AMDGPU::OpName::clamp);
+}
+
+// Set VCC operand with all flags from \p Orig, except for setting it as
+// implicit.
+static void copyFlagsToImplicitVCC(MachineInstr &MI,
+ const MachineOperand &Orig) {
+
+ for (MachineOperand &Use : MI.implicit_operands()) {
+ if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+ Use.setIsUndef(Orig.isUndef());
+ Use.setIsKill(Orig.isKill());
+ return;
+ }
+ }
+}
+
+MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
+ unsigned Op32) const {
+ MachineBasicBlock *MBB = MI.getParent();;
+ MachineInstrBuilder Inst32 =
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+
+ // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
+ // For VOPC instructions, this is replaced by an implicit def of vcc.
+ int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
+ if (Op32DstIdx != -1) {
+ // dst
+ Inst32.add(MI.getOperand(0));
+ } else {
+ assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ "Unexpected case");
+ }
+
+ Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
+
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1)
+ Inst32.add(*Src1);
+
+ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.add(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc. This was already added
+ // during the initial BuildMI, so find it to preserve the flags.
+ copyFlagsToImplicitVCC(*Inst32, *Src2);
+ }
+ }
+
+ return Inst32;
+}
+
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
const MCOperandInfo &OpInfo) const {
@@ -2806,6 +2972,42 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Verify MIMG
+ if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
+ // Ensure that the return type used is large enough for all the options
+ // being used TFE/LWE require an extra result register.
+ const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
+ if (DMask) {
+ uint64_t DMaskImm = DMask->getImm();
+ uint32_t RegCount =
+ isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
+ const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
+ const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
+ const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
+
+ // Adjust for packed 16 bit values
+ if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
+ RegCount >>= 1;
+
+ // Adjust if using LWE or TFE
+ if ((LWE && LWE->getImm()) || (TFE && TFE->getImm()))
+ RegCount += 1;
+
+ const uint32_t DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ const MachineOperand &Dst = MI.getOperand(DstIdx);
+ if (Dst.isReg()) {
+ const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
+ uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
+ if (RegCount > DstSize) {
+ ErrInfo = "MIMG instruction returns too many registers for dst "
+ "register class";
+ return false;
+ }
+ }
+ }
+ }
+
// Verify VOP*. Ignore multiple sgpr operands on writelane.
if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
&& (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) {
@@ -3001,6 +3203,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+ case AMDGPU::S_XNOR_B32:
+ return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
@@ -3438,8 +3642,13 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
// pointer value is uniform.
MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
- unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
- SBase->setReg(SGPR);
+ unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
+ SBase->setReg(SGPR);
+ }
+ MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
+ if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
+ unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
+ SOff->setReg(SGPR);
}
}
@@ -3475,7 +3684,191 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
FoldImmediate(*Copy, *Def, OpReg, &MRI);
}
-void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
+// Emit the actual waterfall loop, executing the wrapped instruction for each
+// unique value of \p Rsrc across all lanes. In the best case we execute 1
+// iteration, in the worst case we execute 64 (once per lane).
+static void
+emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
+ MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
+ const DebugLoc &DL, MachineOperand &Rsrc) {
+ MachineBasicBlock::iterator I = LoopBB.begin();
+
+ unsigned VRsrc = Rsrc.getReg();
+ unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
+
+ unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // Beginning of the loop, read the next Rsrc variant.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
+ .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
+
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
+ .addReg(SRsrcSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(SRsrcSub1)
+ .addImm(AMDGPU::sub1)
+ .addReg(SRsrcSub2)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcSub3)
+ .addImm(AMDGPU::sub3);
+
+ // Update Rsrc operand to use the SGPR Rsrc.
+ Rsrc.setReg(SRsrc);
+ Rsrc.setIsKill(true);
+
+ // Identify all lanes with identical Rsrc operands in their VGPRs.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
+ .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
+ .addReg(VRsrc, 0, AMDGPU::sub0_sub1);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
+ .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
+ .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
+ .addReg(CondReg0)
+ .addReg(CondReg1);
+
+ MRI.setSimpleHint(SaveExec, AndCond);
+
+ // Update EXEC to matching lanes, saving original to SaveExec.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
+ .addReg(AndCond, RegState::Kill);
+
+ // The original instruction is here; we insert the terminators after it.
+ I = LoopBB.end();
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(SaveExec);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
+}
+
+// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
+// with SGPRs by iterating over all unique values across all lanes.
+static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
+ MachineOperand &Rsrc, MachineDominatorTree *MDT) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineBasicBlock::iterator I(&MI);
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // Save the EXEC mask
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
+ .addReg(AMDGPU::EXEC);
+
+ // Killed uses in the instruction we are waterfalling around will be
+ // incorrect due to the added control-flow.
+ for (auto &MO : MI.uses()) {
+ if (MO.isReg() && MO.isUse()) {
+ MRI.clearKillFlags(MO.getReg());
+ }
+ }
+
+ // To insert the loop we need to split the block. Move everything after this
+ // point to a new block, and insert a new empty block between the two.
+ MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF.insert(MBBI, LoopBB);
+ MF.insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(LoopBB);
+ LoopBB->addSuccessor(RemainderBB);
+
+ // Move MI to the LoopBB, and the remainder of the block to RemainderBB.
+ MachineBasicBlock::iterator J = I++;
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+ LoopBB->splice(LoopBB->begin(), &MBB, J);
+
+ MBB.addSuccessor(LoopBB);
+
+ // Update dominators. We know that MBB immediately dominates LoopBB, that
+ // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
+ // dominates all of the successors transferred to it from MBB that MBB used
+ // to dominate.
+ if (MDT) {
+ MDT->addNewBlock(LoopBB, &MBB);
+ MDT->addNewBlock(RemainderBB, LoopBB);
+ for (auto &Succ : RemainderBB->successors()) {
+ if (MDT->dominates(&MBB, Succ)) {
+ MDT->changeImmediateDominator(Succ, RemainderBB);
+ }
+ }
+ }
+
+ emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
+
+ // Restore the EXEC mask
+ MachineBasicBlock::iterator First = RemainderBB->begin();
+ BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(SaveExec);
+}
+
+// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
+static std::tuple<unsigned, unsigned>
+extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Extract the ptr from the resource descriptor.
+ unsigned RsrcPtr =
+ TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
+ AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+
+ // Create an empty resource descriptor
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
+ .addImm(RsrcDataFormat & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
+ .addImm(RsrcDataFormat >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ return std::make_tuple(RsrcPtr, NewSRsrc);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr &MI,
+ MachineDominatorTree *MDT) const {
MachineFunction &MF = *MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -3617,75 +4010,56 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
return;
}
- // Legalize MUBUF* instructions by converting to addr64 form.
- // FIXME: If we start using the non-addr64 instructions for compute, we
- // may need to legalize them as above. This especially applies to the
- // buffer_load_format_* variants and variants with idxen (or bothen).
- int SRsrcIdx =
+ // Legalize MUBUF* instructions.
+ int RsrcIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
- if (SRsrcIdx != -1) {
+ if (RsrcIdx != -1) {
// We have an MUBUF instruction
- MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
- unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
- if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
- RI.getRegClass(SRsrcRC))) {
+ MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
+ unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
+ if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
+ RI.getRegClass(RsrcRC))) {
// The operands are legal.
// FIXME: We may need to legalize operands besided srsrc.
return;
}
- MachineBasicBlock &MBB = *MI.getParent();
-
- // Extract the ptr from the resource descriptor.
- unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+ // Legalize a VGPR Rsrc.
+ //
+ // If the instruction is _ADDR64, we can avoid a waterfall by extracting
+ // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
+ // a zero-value SRsrc.
+ //
+ // If the instruction is _OFFSET (both idxen and offen disabled), and we
+ // support ADDR64 instructions, we can convert to ADDR64 and do the same as
+ // above.
+ //
+ // Otherwise we are on non-ADDR64 hardware, and/or we have
+ // idxen/offen/bothen and we fall back to a waterfall loop.
- // Create an empty resource descriptor
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
- uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
-
- // Zero64 = 0
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
- .addImm(0);
-
- // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
- .addImm(RsrcDataFormat & 0xFFFFFFFF);
-
- // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
- .addImm(RsrcDataFormat >> 32);
-
- // NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+ MachineBasicBlock &MBB = *MI.getParent();
MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- if (VAddr) {
+ if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+ unsigned RsrcPtr, NewSRsrc;
+ std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+ // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
DebugLoc DL = MI.getDebugLoc();
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
- .addReg(SRsrcPtr, 0, AMDGPU::sub0)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
- // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+ // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
- .addReg(SRsrcPtr, 0, AMDGPU::sub1)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -3693,13 +4067,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
.addImm(AMDGPU::sub0)
.addReg(NewVAddrHi)
.addImm(AMDGPU::sub1);
- } else {
+
+ VAddr->setReg(NewVAddr);
+ Rsrc->setReg(NewSRsrc);
+ } else if (!VAddr && ST.hasAddr64()) {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
"FIXME: Need to emit flat atomics here");
+ unsigned RsrcPtr, NewSRsrc;
+ std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
@@ -3715,10 +4096,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
.add(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .add(*SRsrc)
+ .addReg(NewVAddr)
+ .addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset);
@@ -3735,21 +4114,19 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
MIB.addImm(TFE->getImm());
}
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
Addr64 = MIB;
} else {
// Atomics with return.
Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
.add(*VData)
.add(*VDataIn)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .add(*SRsrc)
+ .addReg(NewVAddr)
+ .addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset)
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
}
MI.removeFromParent();
@@ -3757,23 +4134,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
NewVAddr)
- .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
.addImm(AMDGPU::sub0)
- .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
.addImm(AMDGPU::sub1);
-
- VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
- SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
+ } else {
+ // This is another variant; legalize Rsrc with waterfall loop from VGPRs
+ // to SGPRs.
+ loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
}
-
- // Update the instruction to use NewVaddr
- VAddr->setReg(NewVAddr);
- // Update the instruction to use NewSRsrc
- SRsrc->setReg(NewSRsrc);
}
}
-void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
+ MachineDominatorTree *MDT) const {
SetVectorType Worklist;
Worklist.insert(&TopInst);
@@ -3791,34 +4165,62 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
break;
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO:
- splitScalar64BitAddSub(Worklist, Inst);
+ splitScalar64BitAddSub(Worklist, Inst, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_ADD_I32:
case AMDGPU::S_SUB_I32:
// FIXME: The u32 versions currently selected use the carry.
- if (moveScalarAddSub(Worklist, Inst))
+ if (moveScalarAddSub(Worklist, Inst, MDT))
continue;
// Default handling
break;
case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NAND_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NOR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_XNOR_B64:
+ if (ST.hasDLInsts())
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
+ else
+ splitScalar64BitXnor(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_ANDN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
+ Inst.eraseFromParent();
+ continue;
+
+ case AMDGPU::S_ORN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Inst.eraseFromParent();
continue;
@@ -3899,90 +4301,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst.eraseFromParent();
continue;
- case AMDGPU::S_XNOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
+ case AMDGPU::S_NAND_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
Inst.eraseFromParent();
continue;
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
- unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
- auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
- unsigned Offset = 0;
-
- // FIXME: This isn't safe because the addressing mode doesn't work
- // correctly if vaddr is negative.
- //
- // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
- //
- // See if we can extract an immediate offset by recognizing one of these:
- // V_ADD_I32_e32 dst, imm, src1
- // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
- // V_ADD will be removed by "Remove dead machine instructions".
- if (Add &&
- (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
- Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
- static const unsigned SrcNames[2] = {
- AMDGPU::OpName::src0,
- AMDGPU::OpName::src1,
- };
-
- // Find a literal offset in one of source operands.
- for (int i = 0; i < 2; i++) {
- const MachineOperand *Src =
- getNamedOperand(*Add, SrcNames[i]);
-
- if (Src->isReg()) {
- auto Mov = MRI.getUniqueVRegDef(Src->getReg());
- if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
- Src = &Mov->getOperand(1);
- }
-
- if (Src) {
- if (Src->isImm())
- Offset = Src->getImm();
- else if (Src->isCImm())
- Offset = Src->getCImm()->getZExtValue();
- }
-
- if (Offset && isLegalMUBUFImmOffset(Offset)) {
- VAddr = getNamedOperand(*Add, SrcNames[!i]);
- break;
- }
-
- Offset = 0;
- }
- }
+ case AMDGPU::S_NOR_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
+ continue;
- MachineInstr *NewInstr =
- BuildMI(*MBB, Inst, Inst.getDebugLoc(),
- get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
- .add(*VAddr) // vaddr
- .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
- .addImm(0) // soffset
- .addImm(Offset) // offset
- .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
- .getInstr();
-
- MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
- VDst);
- addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
+ case AMDGPU::S_ANDN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
Inst.eraseFromParent();
+ continue;
- // Legalize all operands other than the offset. Notably, convert the srsrc
- // into SGPRs using v_readfirstlane if needed.
- legalizeOperands(*NewInstr);
+ case AMDGPU::S_ORN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
continue;
}
- }
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
continue;
}
@@ -4071,7 +4414,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Legalize the operands
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
if (HasDst)
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
@@ -4079,8 +4422,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Add/sub require special handling to deal with carry outs.
-bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const {
+bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
if (ST.hasAddNoCarry()) {
// Assume there is no user of scc since we don't select this in that case.
// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
@@ -4104,7 +4447,7 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
Inst.setDesc(get(NewOpc));
Inst.addImplicitDefUseOperands(*MBB.getParent());
MRI.replaceRegWith(OldDstReg, ResultReg);
- legalizeOperands(Inst);
+ legalizeOperands(Inst, MDT);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
return true;
@@ -4151,23 +4494,116 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
MachineOperand &Src0 = Inst.getOperand(1);
MachineOperand &Src1 = Inst.getOperand(2);
- legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
- legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
-
- unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
if (ST.hasDLInsts()) {
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
+ legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
+
BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
.add(Src0)
.add(Src1);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
} else {
- unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
- .add(Src0)
+ // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
+ // invert either source and then perform the XOR. If either source is a
+ // scalar register, then we can leave the inversion on the scalar unit to
+ // acheive a better distrubution of scalar and vector instructions.
+ bool Src0IsSGPR = Src0.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
+ bool Src1IsSGPR = Src1.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
+ MachineInstr *Not = nullptr;
+ MachineInstr *Xor = nullptr;
+ unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // Build a pair of scalar instructions and add them to the work list.
+ // The next iteration over the work list will lower these to the vector
+ // unit as necessary.
+ if (Src0IsSGPR) {
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+ .add(Src0);
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+ .addReg(Temp)
.add(Src1);
+ } else if (Src1IsSGPR) {
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+ .add(Src1);
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+ .add(Src0)
+ .addReg(Temp);
+ } else {
+ Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
+ .add(Src0)
+ .add(Src1);
+ Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+ .addReg(Temp);
+ Worklist.insert(Not);
+ }
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest)
- .addReg(Xor);
+ Worklist.insert(Xor);
+
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
}
+}
+
+void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
+ .add(Src0)
+ .add(Src1);
+
+ MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+ .addReg(Interm);
+
+ Worklist.insert(&Op);
+ Worklist.insert(&Not);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
+}
+
+void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
+ .add(Src1);
+
+ MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
+ .add(Src0)
+ .addReg(Interm);
+
+ Worklist.insert(&Not);
+ Worklist.insert(&Op);
MRI.replaceRegWith(Dest.getReg(), NewDest);
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
@@ -4200,13 +4636,13 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
+ MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
+ MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -4217,6 +4653,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+ Worklist.insert(&LoHalf);
+ Worklist.insert(&HiHalf);
+
// We don't need to legalizeOperands here because for a single operand, src0
// will support any kind of input.
@@ -4224,8 +4663,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitAddSub(
- SetVectorType &Worklist, MachineInstr &Inst) const {
+void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
MachineBasicBlock &MBB = *Inst.getParent();
@@ -4285,16 +4725,16 @@ void SIInstrInfo::splitScalar64BitAddSub(
// Try to legalize the operands in case we need to swap the order to keep it
// valid.
- legalizeOperands(*LoHalf);
- legalizeOperands(*HiHalf);
+ legalizeOperands(*LoHalf, MDT);
+ legalizeOperands(*HiHalf, MDT);
// Move all users of this moved vlaue.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitBinaryOp(
- SetVectorType &Worklist, MachineInstr &Inst,
- unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
+ MachineInstr &Inst, unsigned Opcode,
+ MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -4321,6 +4761,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src0SubRC);
MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub0, Src1SubRC);
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+ AMDGPU::sub1, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -4331,11 +4775,6 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
.add(SrcReg0Sub0)
.add(SrcReg1Sub0);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
-
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.add(SrcReg0Sub1)
@@ -4350,22 +4789,62 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- legalizeOperands(LoHalf);
- legalizeOperands(HiHalf);
+ Worklist.insert(&LoHalf);
+ Worklist.insert(&HiHalf);
// Move all users of this moved vlaue.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
+void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+
+ unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ MachineOperand* Op0;
+ MachineOperand* Op1;
+
+ if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
+ Op0 = &Src0;
+ Op1 = &Src1;
+ } else {
+ Op0 = &Src1;
+ Op1 = &Src0;
+ }
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
+ .add(*Op0);
+
+ unsigned NewDest = MRI.createVirtualRegister(DestRC);
+
+ MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
+ .addReg(Interm)
+ .add(*Op1);
+
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+
+ Worklist.insert(&Xor);
+}
+
void SIInstrInfo::splitScalar64BitBCNT(
SetVectorType &Worklist, MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst.getDebugLoc();
+ const DebugLoc &DL = Inst.getDebugLoc();
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src = Inst.getOperand(1);
@@ -4401,7 +4880,7 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineBasicBlock::iterator MII = Inst;
- DebugLoc DL = Inst.getDebugLoc();
+ const DebugLoc &DL = Inst.getDebugLoc();
MachineOperand &Dest = Inst.getOperand(0);
uint32_t Imm = Inst.getOperand(2).getImm();
@@ -4546,10 +5025,10 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
make_range(MachineBasicBlock::iterator(SCCDefInst),
SCCDefInst.getParent()->end())) {
// Exit if we find another SCC def.
- if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
+ if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
return;
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
Worklist.insert(&MI);
}
}
@@ -4716,7 +5195,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
+ (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -4777,12 +5256,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// If we have a definitive size, we can use it. Otherwise we need to inspect
// the operands to know the size.
- //
- // FIXME: Instructions that have a base 32-bit encoding report their size as
- // 4, even though they are really 8 bytes if they have a literal operand.
- if (DescSize != 0 && DescSize != 4)
- return DescSize;
-
if (isFixedSize(MI))
return DescSize;
@@ -4791,23 +5264,27 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isVALU(MI) || isSALU(MI)) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
- return 4; // No operands.
+ return DescSize; // No operands.
if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return 8;
+ return DescSize + 4;
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
- return 4;
+ return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return 8;
+ return DescSize + 4;
- return 4;
- }
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx == -1)
+ return DescSize;
+
+ if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
+ return DescSize + 4;
- if (DescSize == 4)
- return 4;
+ return DescSize;
+ }
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
@@ -4823,7 +5300,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
default:
- llvm_unreachable("unable to find instruction size");
+ return DescSize;
}
}
@@ -4835,7 +5312,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
+ if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
return true;
}
return false;
@@ -5069,3 +5546,84 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
return MCOp;
}
+
+static
+TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
+ assert(RegOpnd.isReg());
+ return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
+ getRegSubRegPair(RegOpnd);
+}
+
+TargetInstrInfo::RegSubRegPair
+llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
+ assert(MI.isRegSequence());
+ for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
+ if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
+ auto &RegOp = MI.getOperand(1 + 2 * I);
+ return getRegOrUndef(RegOp);
+ }
+ return TargetInstrInfo::RegSubRegPair();
+}
+
+// Try to find the definition of reg:subreg in subreg-manipulation pseudos
+// Following a subreg of reg:subreg isn't supported
+static bool followSubRegDef(MachineInstr &MI,
+ TargetInstrInfo::RegSubRegPair &RSR) {
+ if (!RSR.SubReg)
+ return false;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::REG_SEQUENCE:
+ RSR = getRegSequenceSubReg(MI, RSR.SubReg);
+ return true;
+ // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
+ case AMDGPU::INSERT_SUBREG:
+ if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
+ // inserted the subreg we're looking for
+ RSR = getRegOrUndef(MI.getOperand(2));
+ else { // the subreg in the rest of the reg
+ auto R1 = getRegOrUndef(MI.getOperand(1));
+ if (R1.SubReg) // subreg of subreg isn't supported
+ return false;
+ RSR.Reg = R1.Reg;
+ }
+ return true;
+ }
+ return false;
+}
+
+MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI) {
+ assert(MRI.isSSA());
+ if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+ return nullptr;
+
+ auto RSR = P;
+ auto *DefInst = MRI.getVRegDef(RSR.Reg);
+ while (auto *MI = DefInst) {
+ DefInst = nullptr;
+ switch (MI->getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = MI->getOperand(1);
+ if (Op1.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isUndef())
+ return nullptr;
+ RSR = getRegSubRegPair(Op1);
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ break;
+ }
+ default:
+ if (followSubRegDef(*MI, RSR)) {
+ if (!RSR.Reg)
+ return nullptr;
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ }
+ if (!DefInst)
+ return MI;
+ }
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d681b926504e..5b1a05f3785e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -37,6 +37,7 @@
namespace llvm {
class APInt;
+class MachineDominatorTree;
class MachineRegisterInfo;
class RegScavenger;
class GCNSubtarget;
@@ -79,8 +80,8 @@ public:
private:
void swapOperands(MachineInstr &Inst) const;
- bool moveScalarAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -88,14 +89,26 @@ private:
void lowerScalarXnor(SetVectorType &Worklist,
MachineInstr &Inst) const;
+ void splitScalarNotBinop(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const;
+
+ void splitScalarBinOpN2(SetVectorType &Worklist,
+ MachineInstr &Inst,
+ unsigned Opcode) const;
+
void splitScalar64BitUnaryOp(SetVectorType &Worklist,
MachineInstr &Inst, unsigned Opcode) const;
- void splitScalar64BitAddSub(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
+
+ void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
+ unsigned Opcode,
+ MachineDominatorTree *MDT = nullptr) const;
- void splitScalar64BitBinaryOp(SetVectorType &Worklist,
- MachineInstr &Inst, unsigned Opcode) const;
+ void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void splitScalar64BitBCNT(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -160,12 +173,11 @@ public:
int64_t &Offset1,
int64_t &Offset2) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const final;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const final;
- bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
- MachineInstr &SecondLdSt, unsigned BaseReg2,
+ bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
unsigned NumLoads) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
@@ -225,6 +237,9 @@ public:
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
+ unsigned & SrcOpIdx1) const;
+
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
@@ -276,7 +291,7 @@ public:
unsigned TrueReg, unsigned FalseReg) const;
unsigned getAddressSpaceForPseudoSourceKind(
- PseudoSourceValue::PSVKind Kind) const override;
+ unsigned Kind) const override;
bool
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
@@ -589,6 +604,14 @@ public:
return MI.getDesc().TSFlags & ClampFlags;
}
+ static bool usesFPDPRounding(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
+ }
+
+ bool usesFPDPRounding(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -689,6 +712,12 @@ public:
unsigned OpName) const;
bool hasAnyModifiersSet(const MachineInstr &MI) const;
+ bool canShrink(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const;
+
+ MachineInstr *buildShrunkInst(MachineInstr &MI,
+ unsigned NewOpcode) const;
+
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
@@ -719,6 +748,16 @@ public:
/// This form should usually be preferred since it handles operands
/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) {
+ if (unsigned SubReg = MO.getSubReg()) {
+ assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
+ MI.getParent()->getParent()->getRegInfo().
+ getRegClass(MO.getReg()), SubReg)) >= 32 &&
+ "Sub-dword subregs are not supported");
+ return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
+ }
+ }
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}
@@ -777,14 +816,16 @@ public:
MachineOperand &Op, MachineRegisterInfo &MRI,
const DebugLoc &DL) const;
- /// Legalize all operands in this instruction. This function may
- /// create new instruction and insert them before \p MI.
- void legalizeOperands(MachineInstr &MI) const;
+ /// Legalize all operands in this instruction. This function may create new
+ /// instructions and control-flow around \p MI. If present, \p MDT is
+ /// updated.
+ void legalizeOperands(MachineInstr &MI,
+ MachineDominatorTree *MDT = nullptr) const;
/// Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
- /// VALU if necessary.
- void moveToVALU(MachineInstr &MI) const;
+ /// VALU if necessary. If present, \p MDT is updated.
+ void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
int Count) const;
@@ -885,9 +926,36 @@ public:
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
-
};
+/// \brief Returns true if a reg:subreg pair P has a TRC class
+inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
+ const TargetRegisterClass &TRC,
+ MachineRegisterInfo &MRI) {
+ auto *RC = MRI.getRegClass(P.Reg);
+ if (!P.SubReg)
+ return RC == &TRC;
+ auto *TRI = MRI.getTargetRegisterInfo();
+ return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
+}
+
+/// \brief Create RegSubRegPair from a register MachineOperand
+inline
+TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
+ assert(O.isReg());
+ return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
+}
+
+/// \brief Return the SubReg component from REG_SEQUENCE
+TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
+ unsigned SubReg);
+
+/// \brief Return the defining instruction for a given reg:subreg pair
+/// skipping copy like instructions and subreg-manipulation pseudos.
+/// Following another subreg of a reg:subreg isn't supported.
+MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI);
+
namespace AMDGPU {
LLVM_READONLY
@@ -900,6 +968,9 @@ namespace AMDGPU {
int getSDWAOp(uint16_t Opcode);
LLVM_READONLY
+ int getDPPOp32(uint16_t Opcode);
+
+ LLVM_READONLY
int getBasicFromSDWAOp(uint16_t Opcode);
LLVM_READONLY
@@ -911,6 +982,12 @@ namespace AMDGPU {
LLVM_READONLY
int getAddr64Inst(uint16_t Opcode);
+ /// Check if \p Opcode is an Addr64 opcode.
+ ///
+ /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
+ LLVM_READONLY
+ int getIfAddr64Inst(uint16_t Opcode);
+
LLVM_READONLY
int getMUBUFNoLdsInst(uint16_t Opcode);
@@ -923,6 +1000,9 @@ namespace AMDGPU {
LLVM_READONLY
int getSOPKOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getGlobalSaddrOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8fa37aa83dae..13afa4d4974b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -40,9 +40,9 @@ def SIEncodingFamily {
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
-def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
- [SDNPMayLoad, SDNPMemOperand]
+def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
+ SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
+ [SDNPMayLoad, SDNPMemOperand]
>;
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
@@ -69,36 +69,34 @@ def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SDTbuffer_load : SDTypeProfile<1, 9,
+def SDTtbuffer_load : SDTypeProfile<1, 8,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
SDTCisVT<2, i32>, // vindex(VGPR)
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // glc(imm)
- SDTCisVT<9, i32> // slc(imm)
+ SDTCisVT<6, i32>, // format(imm)
+ SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<8, i1> // idxen(imm)
]>;
-def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTbuffer_load,
+def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
- SDTbuffer_load,
+ SDTtbuffer_load,
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
-def SDTtbuffer_store : SDTypeProfile<0, 10,
+def SDTtbuffer_store : SDTypeProfile<0, 9,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
SDTCisVT<2, i32>, // vindex(VGPR)
SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // glc(imm)
- SDTCisVT<9, i32> // slc(imm)
+ SDTCisVT<6, i32>, // format(imm)
+ SDTCisVT<7, i32>, // cachecontrol(imm)
+ SDTCisVT<8, i1> // idxen(imm)
]>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
@@ -110,13 +108,15 @@ def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
SDTtbuffer_store,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
-def SDTBufferLoad : SDTypeProfile<1, 5,
+def SDTBufferLoad : SDTypeProfile<1, 7,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex
- SDTCisVT<3, i32>, // offset
- SDTCisVT<4, i1>, // glc
- SDTCisVT<5, i1>]>; // slc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
@@ -126,13 +126,15 @@ def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
-def SDTBufferStore : SDTypeProfile<0, 6,
+def SDTBufferStore : SDTypeProfile<0, 8,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex
- SDTCisVT<3, i32>, // offset
- SDTCisVT<4, i1>, // glc
- SDTCisVT<5, i1>]>; // slc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<7, i1>]>; // idxen(imm)
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -144,13 +146,16 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
class SDBufferAtomic<string opcode> : SDNode <opcode,
- SDTypeProfile<1, 5,
+ SDTypeProfile<1, 8,
[SDTCisVT<0, i32>, // dst
SDTCisVT<1, i32>, // vdata
SDTCisVT<2, v4i32>, // rsrc
- SDTCisVT<3, i32>, // vindex
- SDTCisVT<4, i32>, // offset
- SDTCisVT<5, i1>]>, // slc
+ SDTCisVT<3, i32>, // vindex(VGPR)
+ SDTCisVT<4, i32>, // voffset(VGPR)
+ SDTCisVT<5, i32>, // soffset(SGPR)
+ SDTCisVT<6, i32>, // offset(imm)
+ SDTCisVT<7, i32>, // cachepolicy(imm)
+ SDTCisVT<8, i1>]>, // idxen(imm)
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
@@ -166,14 +171,17 @@ def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
- SDTypeProfile<1, 6,
+ SDTypeProfile<1, 9,
[SDTCisVT<0, i32>, // dst
SDTCisVT<1, i32>, // src
SDTCisVT<2, i32>, // cmp
SDTCisVT<3, v4i32>, // rsrc
- SDTCisVT<4, i32>, // vindex
- SDTCisVT<5, i32>, // offset
- SDTCisVT<6, i1>]>, // slc
+ SDTCisVT<4, i32>, // vindex(VGPR)
+ SDTCisVT<5, i32>, // voffset(VGPR)
+ SDTCisVT<6, i32>, // soffset(SGPR)
+ SDTCisVT<7, i32>, // offset(imm)
+ SDTCisVT<8, i32>, // cachepolicy(imm)
+ SDTCisVT<9, i1>]>, // idxen(imm)
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
@@ -487,24 +495,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
}]>;
class VGPRImm <dag frag> : PatLeaf<frag, [{
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return false;
- }
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- unsigned Limit = 0;
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- Limit < 10 && U != E; ++U, ++Limit) {
- const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-
- // If the register class is unknown, it could be an unknown
- // register class that needs to be an SGPR, e.g. an inline asm
- // constraint
- if (!RC || SIRI->isSGPRClass(RC))
- return false;
- }
-
- return Limit < 10;
+ return isVGPRImm(N);
}]>;
def NegateImm : SDNodeXForm<imm, [{
@@ -746,14 +737,13 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
-def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
-def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
-def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
+def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
@@ -1632,7 +1622,7 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src0
- !if(!eq(Src0VT.Size, 64),
+ !if(!eq(Src1VT.Size, 64),
0, // 64-bit src2
1
)
@@ -1641,6 +1631,12 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
);
}
+class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32> {
+ bit ret = !if(!eq(NumSrcArgs, 0), 0,
+ getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
+}
+
class BitOr<bit a, bit b> {
bit ret = !if(a, 1, !if(b, 1, 0));
}
@@ -1649,6 +1645,11 @@ class BitAnd<bit a, bit b> {
bit ret = !if(a, !if(b, 1, 0), 0);
}
+def PatGenMode {
+ int NoPattern = 0;
+ int Pattern = 1;
+}
+
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
@@ -1715,7 +1716,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasSDWA9 = HasExt;
+ field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field bit HasExtSDWA = HasExt;
+ field bit HasExtSDWA9 = HasExt;
+ field int NeedPatGen = PatGenMode.NoPattern;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1743,8 +1747,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
getOpSelMod<Src0VT>.ret,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
- field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP = !if(HasExtDPP,
+ getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+ (ins));
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
@@ -1758,14 +1764,21 @@ class VOPProfile <list<ValueType> _ArgVT> {
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
- field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmDPP = !if(HasExtDPP,
+ getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
}
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
+}
+
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+ let NeedPatGen = mode;
}
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
@@ -1788,6 +1801,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
+def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
@@ -1925,6 +1940,15 @@ def getBasicFromSDWAOp : InstrMapping {
let ValueCols = [["Default"]];
}
+// Maps ordinary instructions to their DPP counterparts
+def getDPPOp32 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["DPP"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
@@ -1977,6 +2001,14 @@ def getAddr64Inst : InstrMapping {
let ValueCols = [["1"]];
}
+def getIfAddr64Inst : InstrMapping {
+ let FilterClass = "MUBUFAddr64Table";
+ let RowFields = ["OpName"];
+ let ColFields = ["IsAddr64"];
+ let KeyCol = ["1"];
+ let ValueCols = [["1"]];
+}
+
def getMUBUFNoLdsInst : InstrMapping {
let FilterClass = "MUBUFLdsTable";
let RowFields = ["OpName"];
@@ -2003,6 +2035,15 @@ def getAtomicNoRetOp : InstrMapping {
let ValueCols = [["0"]];
}
+// Maps a GLOBAL to its SADDR form.
+def getGlobalSaddrOp : InstrMapping {
+ let FilterClass = "GlobalSaddrTable";
+ let RowFields = ["SaddrOp"];
+ let ColFields = ["IsSaddr"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5c10646161b3..b6b00c2e4257 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -15,8 +15,8 @@ class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateContro
let SubtargetPredicate = isGCN;
}
-include "VOPInstructions.td"
include "SOPInstructions.td"
+include "VOPInstructions.td"
include "SMInstructions.td"
include "FLATInstructions.td"
include "BUFInstructions.td"
@@ -164,29 +164,26 @@ def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
} // End usesCustomInserter = 1, Defs = [SCC]
-let usesCustomInserter = 1, SALU = 1 in {
-def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
+let usesCustomInserter = 1 in {
+def GET_GROUPSTATICSIZE : SPseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
} // End let usesCustomInserter = 1, SALU = 1
-def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_MOV_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
}
-def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_XOR_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
let Defs = [SCC];
}
-def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_ANDN2_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
(ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let SALU = 1;
let isAsCheapAsAMove = 1;
let isTerminator = 1;
}
@@ -250,7 +247,7 @@ def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
[(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
let Size = 8;
- let isBranch = 0;
+ let isBranch = 1;
let hasSideEffects = 1;
}
@@ -267,14 +264,6 @@ def SI_END_CF : CFPseudoInstSI <
let mayStore = 1;
}
-def SI_BREAK : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src),
- [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
- let Size = 4;
- let isAsCheapAsAMove = 1;
- let isReMaterializable = 1;
-}
-
def SI_IF_BREAK : CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
[(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
@@ -283,14 +272,6 @@ def SI_IF_BREAK : CFPseudoInstSI <
let isReMaterializable = 1;
}
-def SI_ELSE_BREAK : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
- [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
- let Size = 4;
- let isAsCheapAsAMove = 1;
- let isReMaterializable = 1;
-}
-
let Uses = [EXEC] in {
multiclass PseudoInstKill <dag ins> {
@@ -326,6 +307,7 @@ def SI_ILLEGAL_COPY : SPseudoInstSI <
def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
let isTerminator = 1;
let usesCustomInserter = 1;
+ let isBranch = 1;
}
def SI_PS_LIVE : PseudoInstSI <
@@ -598,7 +580,13 @@ def : Pat <
(int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm<f32>:$imm, cond:$cond))),
(SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
>;
-// TODO: we could add more variants for other types of conditionals
+
+ // TODO: we could add more variants for other types of conditionals
+
+def : Pat <
+ (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+ (COPY $src) // Return the SGPRs representing i1 src
+>;
//===----------------------------------------------------------------------===//
// VOP1 Patterns
@@ -730,12 +718,14 @@ defm : SelectPat <i32, V_CNDMASK_B32_e64>;
defm : SelectPat <f16, V_CNDMASK_B32_e64>;
defm : SelectPat <f32, V_CNDMASK_B32_e64>;
+let AddedComplexity = 1 in {
def : GCNPat <
- (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+ (i32 (add (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)), i32:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+}
def : GCNPat <
- (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)),
+ (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
@@ -867,6 +857,8 @@ def : BitConvert <f64, v2f32, VReg_64>;
def : BitConvert <v2f32, f64, VReg_64>;
def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v4i16, v4f16, VReg_64>;
+def : BitConvert <v4f16, v4i16, VReg_64>;
// FIXME: Make SGPR
def : BitConvert <v2i32, v4f16, VReg_64>;
@@ -1324,6 +1316,38 @@ def : GCNPat <
>;
def : GCNPat <
+ (i1 (add i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+let AddedComplexity = 1 in {
+def : GCNPat <
+ (i1 (add i1:$src0, (i1 -1))),
+ (S_NOT_B64 $src0)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, (i1 -1))),
+ (S_NOT_B64 $src0)
+>;
+}
+
+def : GCNPat <
+ (f16 (sint_to_fp i1:$src)),
+ (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+>;
+
+def : GCNPat <
+ (f16 (uint_to_fp i1:$src)),
+ (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+>;
+
+def : GCNPat <
(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
>;
@@ -1464,13 +1488,32 @@ class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPa
def : ExpPattern<AMDGPUexport, i32, EXP>;
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
-// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
+// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
(v2i16 (build_vector (i16 0), i16:$src1)),
- (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
+ (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector i16:$src0, (i16 undef))),
+ (v2i16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2f16 (build_vector f16:$src0, (f16 undef))),
+ (v2f16 (COPY $src0))
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector (i16 undef), i16:$src1)),
+ (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
>;
+def : GCNPat <
+ (v2f16 (build_vector (f16 undef), f16:$src1)),
+ (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -1501,15 +1544,15 @@ def : GCNPat <
} // End SubtargetPredicate = HasVOP3PInsts
-// def : GCNPat <
-// (v2f16 (scalar_to_vector f16:$src0)),
-// (COPY $src0)
-// >;
+def : GCNPat <
+ (v2f16 (scalar_to_vector f16:$src0)),
+ (COPY $src0)
+>;
-// def : GCNPat <
-// (v2i16 (scalar_to_vector i16:$src0)),
-// (COPY $src0)
-// >;
+def : GCNPat <
+ (v2i16 (scalar_to_vector i16:$src0)),
+ (COPY $src0)
+>;
def : GCNPat <
(v4i16 (scalar_to_vector i16:$src0)),
@@ -1587,18 +1630,19 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
-def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
-def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
+defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
+defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
}
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
class FPMed3Pat<ValueType vt,
+ //SDPatternOperator max, SDPatternOperator min,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
(VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
@@ -1606,28 +1650,41 @@ class FPMed3Pat<ValueType vt,
class FP16Med3Pat<ValueType vt,
Instruction med3Inst> : GCNPat<
- (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
- (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
- (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
(vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
>;
-class Int16Med3Pat<Instruction med3Inst,
+multiclass Int16Med3Pat<Instruction med3Inst,
+ SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
- ValueType vt = i32> : GCNPat<
+ ValueType vt = i16> {
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : GCNPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
>;
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : GCNPat <
+ (min (max_oneuse vt:$src0, vt:$src1),
+ (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+ (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+}
+
def : FPMed3Pat<f32, V_MED3_F32>;
let OtherPredicates = [isGFX9] in {
def : FP16Med3Pat<f16, V_MED3_F16>;
-def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
-def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
+defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
index 7b7cf1635050..e51ff4b4bc50 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -16,36 +16,4 @@
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
- // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
- def int_SI_tbuffer_store : Intrinsic <
- [],
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
- llvm_i32_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // dfmt(imm)
- llvm_i32_ty, // nfmt(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- []>;
-
- // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
- def int_SI_buffer_load_dword : Intrinsic <
- [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- [IntrReadMem, IntrArgMemOnly]>;
-
} // End TargetPrefix = "SI", isTarget = 1
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4b537540046f..be291b127301 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -20,6 +20,26 @@
// ==>
// s_buffer_load_dwordx2 s[4:5], s[0:3], 4
//
+// This pass also tries to promote constant offset to the immediate by
+// adjusting the base. It tries to use a base from the nearby instructions that
+// allows it to have a 13bit constant offset and then promotes the 13bit offset
+// to the immediate.
+// E.g.
+// s_movk_i32 s0, 0x1800
+// v_add_co_u32_e32 v0, vcc, s0, v2
+// v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+//
+// s_movk_i32 s0, 0x1000
+// v_add_co_u32_e32 v5, vcc, s0, v2
+// v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+// global_load_dwordx2 v[5:6], v[5:6], off
+// global_load_dwordx2 v[0:1], v[0:1], off
+// =>
+// s_movk_i32 s0, 0x1000
+// v_add_co_u32_e32 v5, vcc, s0, v2
+// v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+// global_load_dwordx2 v[5:6], v[5:6], off
+// global_load_dwordx2 v[0:1], v[5:6], off offset:2048
//
// Future improvements:
//
@@ -43,9 +63,9 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@@ -74,23 +94,38 @@ using namespace llvm;
#define DEBUG_TYPE "si-load-store-opt"
namespace {
+enum InstClassEnum {
+ UNKNOWN,
+ DS_READ,
+ DS_WRITE,
+ S_BUFFER_LOAD_IMM,
+ BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN,
+ BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN,
+ BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+ BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact,
+ BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact,
+ BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact,
+ BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact,
+};
-class SILoadStoreOptimizer : public MachineFunctionPass {
- enum InstClassEnum {
- DS_READ_WRITE,
- S_BUFFER_LOAD_IMM,
- BUFFER_LOAD_OFFEN,
- BUFFER_LOAD_OFFSET,
- BUFFER_STORE_OFFEN,
- BUFFER_STORE_OFFSET,
- };
+enum RegisterEnum {
+ SBASE = 0x1,
+ SRSRC = 0x2,
+ SOFFSET = 0x4,
+ VADDR = 0x8,
+ ADDR = 0x10,
+};
+class SILoadStoreOptimizer : public MachineFunctionPass {
struct CombineInfo {
MachineBasicBlock::iterator I;
MachineBasicBlock::iterator Paired;
unsigned EltSize;
unsigned Offset0;
unsigned Offset1;
+ unsigned Width0;
+ unsigned Width1;
unsigned BaseOff;
InstClassEnum InstClass;
bool GLC0;
@@ -98,9 +133,23 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool SLC0;
bool SLC1;
bool UseST64;
- bool IsX2;
- SmallVector<MachineInstr*, 8> InstsToMove;
- };
+ SmallVector<MachineInstr *, 8> InstsToMove;
+ };
+
+ struct BaseRegisters {
+ unsigned LoReg = 0;
+ unsigned HiReg = 0;
+
+ unsigned LoSubReg = 0;
+ unsigned HiSubReg = 0;
+ };
+
+ struct MemAddress {
+ BaseRegisters Base;
+ int64_t Offset = 0;
+ };
+
+ using MemInfoMap = DenseMap<MachineInstr *, MemAddress>;
private:
const GCNSubtarget *STM = nullptr;
@@ -108,9 +157,16 @@ private:
const SIRegisterInfo *TRI = nullptr;
MachineRegisterInfo *MRI = nullptr;
AliasAnalysis *AA = nullptr;
- unsigned CreatedX2;
+ bool OptimizeAgain;
static bool offsetsCanBeCombined(CombineInfo &CI);
+ static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
+ static unsigned getNewOpcode(const CombineInfo &CI);
+ static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
+ const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
+ unsigned getOpcodeWidth(const MachineInstr &MI);
+ InstClassEnum getInstClass(unsigned Opc);
+ unsigned getRegs(unsigned Opc);
bool findMatchingInst(CombineInfo &CI);
@@ -123,10 +179,21 @@ private:
MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
- unsigned promoteBufferStoreOpcode(const MachineInstr &I, bool &IsX2,
- bool &IsOffen) const;
MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
+ void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
+ int32_t NewOffset);
+ unsigned computeBase(MachineInstr &MI, const MemAddress &Addr);
+ MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI);
+ Optional<int32_t> extractConstOffset(const MachineOperand &Op);
+ void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr);
+ /// Promotes constant offset to the immediate by adjusting the base. It
+ /// tries to use a base from the nearby instructions that allows it to have
+ /// a 13bit constant offset which gets promoted to the immediate.
+ bool promoteConstantOffsetToImm(MachineInstr &CI,
+ MemInfoMap &Visited,
+ SmallPtrSet<MachineInstr *, 4> &Promoted);
+
public:
static char ID;
@@ -153,8 +220,8 @@ public:
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
"SI Load Store Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
- "SI Load Store Optimizer", false, false)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load Store Optimizer",
+ false, false)
char SILoadStoreOptimizer::ID = 0;
@@ -165,7 +232,7 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass() {
}
static void moveInstsAfter(MachineBasicBlock::iterator I,
- ArrayRef<MachineInstr*> InstsToMove) {
+ ArrayRef<MachineInstr *> InstsToMove) {
MachineBasicBlock *MBB = I->getParent();
++I;
for (MachineInstr *MI : InstsToMove) {
@@ -191,21 +258,19 @@ static void addDefsUsesToList(const MachineInstr &MI,
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
MachineBasicBlock::iterator B,
const SIInstrInfo *TII,
- AliasAnalysis * AA) {
+ AliasAnalysis *AA) {
// RAW or WAR - cannot reorder
// WAW - cannot reorder
// RAR - safe to reorder
return !(A->mayStore() || B->mayStore()) ||
- TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+ TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
}
// Add MI and its defs to the lists if MI reads one of the defs that are
// already in the list. Returns true in that case.
-static bool
-addToListsIfDependent(MachineInstr &MI,
- DenseSet<unsigned> &RegDefs,
- DenseSet<unsigned> &PhysRegUses,
- SmallVectorImpl<MachineInstr*> &Insts) {
+static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
+ DenseSet<unsigned> &PhysRegUses,
+ SmallVectorImpl<MachineInstr *> &Insts) {
for (MachineOperand &Use : MI.operands()) {
// If one of the defs is read, then there is a use of Def between I and the
// instruction that I will potentially be merged with. We will need to move
@@ -228,18 +293,16 @@ addToListsIfDependent(MachineInstr &MI,
return false;
}
-static bool
-canMoveInstsAcrossMemOp(MachineInstr &MemOp,
- ArrayRef<MachineInstr*> InstsToMove,
- const SIInstrInfo *TII,
- AliasAnalysis *AA) {
+static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
+ ArrayRef<MachineInstr *> InstsToMove,
+ const SIInstrInfo *TII, AliasAnalysis *AA) {
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
if (!InstToMove->mayLoadOrStore())
continue;
if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
- return false;
+ return false;
}
return true;
}
@@ -260,10 +323,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
CI.BaseOff = 0;
// Handle SMEM and VMEM instructions.
- if (CI.InstClass != DS_READ_WRITE) {
- unsigned Diff = CI.IsX2 ? 2 : 1;
- return (EltOffset0 + Diff == EltOffset1 ||
- EltOffset1 + Diff == EltOffset0) &&
+ if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
+ return (EltOffset0 + CI.Width0 == EltOffset1 ||
+ EltOffset1 + CI.Width1 == EltOffset0) &&
CI.GLC0 == CI.GLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}
@@ -305,42 +367,176 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
return false;
}
+bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
+ const CombineInfo &CI) {
+ const unsigned Width = (CI.Width0 + CI.Width1);
+ switch (CI.InstClass) {
+ default:
+ return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
+ case S_BUFFER_LOAD_IMM:
+ switch (Width) {
+ default:
+ return false;
+ case 2:
+ case 4:
+ return true;
+ }
+ }
+}
+
+unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) {
+ const unsigned Opc = MI.getOpcode();
+
+ if (TII->isMUBUF(MI)) {
+ return AMDGPU::getMUBUFDwords(Opc);
+ }
+
+ switch (Opc) {
+ default:
+ return 0;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ return 1;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ return 2;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return 4;
+ }
+}
+
+InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) {
+ if (TII->isMUBUF(Opc)) {
+ const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc);
+
+ // If we couldn't identify the opcode, bail out.
+ if (baseOpcode == -1) {
+ return UNKNOWN;
+ }
+
+ switch (baseOpcode) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
+ return BUFFER_LOAD_OFFEN;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
+ return BUFFER_LOAD_OFFSET;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
+ return BUFFER_STORE_OFFEN;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
+ return BUFFER_STORE_OFFSET;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
+ return BUFFER_LOAD_OFFEN_exact;
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+ return BUFFER_LOAD_OFFSET_exact;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
+ return BUFFER_STORE_OFFEN_exact;
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+ return BUFFER_STORE_OFFSET_exact;
+ }
+ }
+
+ switch (Opc) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return S_BUFFER_LOAD_IMM;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64_gfx9:
+ return DS_READ;
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return DS_WRITE;
+ }
+}
+
+unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) {
+ if (TII->isMUBUF(Opc)) {
+ unsigned result = 0;
+
+ if (AMDGPU::getMUBUFHasVAddr(Opc)) {
+ result |= VADDR;
+ }
+
+ if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
+ result |= SRSRC;
+ }
+
+ if (AMDGPU::getMUBUFHasSoffset(Opc)) {
+ result |= SOFFSET;
+ }
+
+ return result;
+ }
+
+ switch (Opc) {
+ default:
+ return 0;
+ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ return SBASE;
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_B64:
+ case AMDGPU::DS_READ_B32_gfx9:
+ case AMDGPU::DS_READ_B64_gfx9:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B64:
+ case AMDGPU::DS_WRITE_B32_gfx9:
+ case AMDGPU::DS_WRITE_B64_gfx9:
+ return ADDR;
+ }
+}
+
bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator MBBI = CI.I;
- unsigned AddrOpName[3] = {0};
- int AddrIdx[3];
- const MachineOperand *AddrReg[3];
+ const unsigned Opc = CI.I->getOpcode();
+ const InstClassEnum InstClass = getInstClass(Opc);
+
+ if (InstClass == UNKNOWN) {
+ return false;
+ }
+
+ const unsigned Regs = getRegs(Opc);
+
+ unsigned AddrOpName[5] = {0};
+ int AddrIdx[5];
+ const MachineOperand *AddrReg[5];
unsigned NumAddresses = 0;
- switch (CI.InstClass) {
- case DS_READ_WRITE:
+ if (Regs & ADDR) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
- break;
- case S_BUFFER_LOAD_IMM:
+ }
+
+ if (Regs & SBASE) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
- break;
- case BUFFER_LOAD_OFFEN:
- case BUFFER_STORE_OFFEN:
- AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
- AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
- AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- break;
- case BUFFER_LOAD_OFFSET:
- case BUFFER_STORE_OFFSET:
+ }
+
+ if (Regs & SRSRC) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
+ }
+
+ if (Regs & SOFFSET) {
AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- break;
+ }
+
+ if (Regs & VADDR) {
+ AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
}
for (unsigned i = 0; i < NumAddresses; i++) {
AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);
- // We only ever merge operations with the same base address register, so don't
- // bother scanning forward if there are no other uses.
+ // We only ever merge operations with the same base address register, so
+ // don't bother scanning forward if there are no other uses.
if (AddrReg[i]->isReg() &&
(TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) ||
MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
@@ -353,8 +549,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
DenseSet<unsigned> PhysRegUsesToMove;
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
- for ( ; MBBI != E; ++MBBI) {
- if (MBBI->getOpcode() != CI.I->getOpcode()) {
+ for (; MBBI != E; ++MBBI) {
+ const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE);
+
+ if ((getInstClass(MBBI->getOpcode()) != InstClass) ||
+ (IsDS && (MBBI->getOpcode() != Opc))) {
// This is not a matching DS instruction, but we can keep looking as
// long as one of these conditions are met:
// 1. It is safe to move I down past MBBI.
@@ -368,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (MBBI->mayLoadOrStore() &&
- (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+ (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
@@ -413,8 +612,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
continue;
}
- // Check same base pointer. Be careful of subregisters, which can occur with
- // vectors of pointers.
+ // Check same base pointer. Be careful of subregisters, which can occur
+ // with vectors of pointers.
if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
Match = false;
@@ -423,13 +622,15 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (Match) {
- int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
- AMDGPU::OpName::offset);
+ int OffsetIdx =
+ AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset);
CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
+ CI.Width0 = getOpcodeWidth(*CI.I);
CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
+ CI.Width1 = getOpcodeWidth(*MBBI);
CI.Paired = MBBI;
- if (CI.InstClass == DS_READ_WRITE) {
+ if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) {
CI.Offset0 &= 0xffff;
CI.Offset1 &= 0xffff;
} else {
@@ -445,7 +646,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (offsetsCanBeCombined(CI))
+ if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
return true;
}
@@ -472,12 +673,12 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
- return (EltSize == 4) ?
- AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32_gfx9
+ : AMDGPU::DS_READ2ST64_B64_gfx9;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
@@ -489,8 +690,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = CI.UseST64 ?
- read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
+ unsigned Opc =
+ CI.UseST64 ? read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
@@ -502,39 +703,40 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
}
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
+ (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Read2Desc = TII->get(Opc);
- const TargetRegisterClass *SuperRC
- = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ const TargetRegisterClass *SuperRC =
+ (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
- .addImm(CI.BaseOff);
+ .addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
- .addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(ImmReg)
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Read2 =
- BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
- .addReg(BaseReg, BaseRegFlags) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
(void)Read2;
@@ -561,32 +763,36 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
- return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9
+ : AMDGPU::DS_WRITE2_B64_gfx9;
}
unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
- return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
+ : AMDGPU::DS_WRITE2ST64_B64;
- return (EltSize == 4) ?
- AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9;
+ return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32_gfx9
+ : AMDGPU::DS_WRITE2ST64_B64_gfx9;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
- const MachineOperand *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
- const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
- const MachineOperand *Data1
- = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+ const MachineOperand *AddrReg =
+ TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+ const MachineOperand *Data0 =
+ TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
+ const MachineOperand *Data1 =
+ TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
unsigned NewOffset0 = CI.Offset0;
unsigned NewOffset1 = CI.Offset1;
- unsigned Opc = CI.UseST64 ?
- write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
+ unsigned Opc =
+ CI.UseST64 ? write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -595,36 +801,37 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
}
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
- (NewOffset0 != NewOffset1) &&
- "Computed offset doesn't fit");
+ (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
- .addImm(CI.BaseOff);
+ .addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
- .addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(ImmReg)
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Write2 =
- BuildMI(*MBB, CI.Paired, DL, Write2Desc)
- .addReg(BaseReg, BaseRegFlags) // addr
- .add(*Data0) // data0
- .add(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .add(*Data0) // data0
+ .add(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(Write2, CI.InstsToMove);
@@ -636,15 +843,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
return Next;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- unsigned Opcode = CI.IsX2 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM :
- AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+ const unsigned Opcode = getNewOpcode(CI);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::SReg_128RegClass : &AMDGPU::SReg_64_XEXECRegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
@@ -652,14 +858,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -681,29 +884,25 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
return Next;
}
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
- CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- unsigned Opcode;
- if (CI.InstClass == BUFFER_LOAD_OFFEN) {
- Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN :
- AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
- } else {
- Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET :
- AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
- }
+ const unsigned Opcode = getNewOpcode(CI);
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+
+ // Copy to the new source register.
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
- if (CI.InstClass == BUFFER_LOAD_OFFEN)
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+ const unsigned Regs = getRegs(Opcode);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -711,14 +910,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -740,57 +936,137 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
return Next;
}
-unsigned SILoadStoreOptimizer::promoteBufferStoreOpcode(
- const MachineInstr &I, bool &IsX2, bool &IsOffen) const {
- IsX2 = false;
- IsOffen = false;
-
- switch (I.getOpcode()) {
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
- case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
- IsX2 = true;
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact:
- IsX2 = true;
- IsOffen = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN_exact;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
- case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
- return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
- IsX2 = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
- case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact:
- IsX2 = true;
- return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET_exact;
- }
- return 0;
-}
-
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
- CombineInfo &CI) {
+unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
+ const unsigned Width = CI.Width0 + CI.Width1;
+
+ switch (CI.InstClass) {
+ default:
+ return AMDGPU::getMUBUFOpcode(CI.InstClass, Width);
+ case UNKNOWN:
+ llvm_unreachable("Unknown instruction class");
+ case S_BUFFER_LOAD_IMM:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+ case 4:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
+ }
+ }
+}
+
+std::pair<unsigned, unsigned>
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
+ if (CI.Offset0 > CI.Offset1) {
+ switch (CI.Width0) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1, AMDGPU::sub0);
+ case 2:
+ return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1);
+ case 3:
+ return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2);
+ }
+ case 2:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0);
+ case 2:
+ return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1);
+ }
+ case 3:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0);
+ }
+ }
+ } else {
+ switch (CI.Width0) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1);
+ case 2:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2);
+ case 3:
+ return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3);
+ }
+ case 2:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2);
+ case 2:
+ return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3);
+ }
+ case 3:
+ switch (CI.Width1) {
+ default:
+ return std::make_pair(0, 0);
+ case 1:
+ return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3);
+ }
+ }
+ }
+}
+
+const TargetRegisterClass *
+SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
+ if (CI.InstClass == S_BUFFER_LOAD_IMM) {
+ switch (CI.Width0 + CI.Width1) {
+ default:
+ return nullptr;
+ case 2:
+ return &AMDGPU::SReg_64_XEXECRegClass;
+ case 4:
+ return &AMDGPU::SReg_128RegClass;
+ case 8:
+ return &AMDGPU::SReg_256RegClass;
+ case 16:
+ return &AMDGPU::SReg_512RegClass;
+ }
+ } else {
+ switch (CI.Width0 + CI.Width1) {
+ default:
+ return nullptr;
+ case 2:
+ return &AMDGPU::VReg_64RegClass;
+ case 3:
+ return &AMDGPU::VReg_96RegClass;
+ case 4:
+ return &AMDGPU::VReg_128RegClass;
+ }
+ }
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
- bool Unused1, Unused2;
- unsigned Opcode = promoteBufferStoreOpcode(*CI.I, Unused1, Unused2);
- unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+ const unsigned Opcode = getNewOpcode(CI);
- // Handle descending offsets
- if (CI.Offset0 > CI.Offset1)
- std::swap(SubRegIdx0, SubRegIdx1);
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
// Copy to the new source register.
- const TargetRegisterClass *SuperRC =
- CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
unsigned SrcReg = MRI->createVirtualRegister(SuperRC);
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
@@ -803,18 +1079,20 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
.addImm(SubRegIdx1);
auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
- .addReg(SrcReg, RegState::Kill);
+ .addReg(SrcReg, RegState::Kill);
- if (CI.InstClass == BUFFER_STORE_OFFEN)
- MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+ const unsigned Regs = getRegs(Opcode);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
- .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(MIB, CI.InstsToMove);
@@ -824,105 +1102,399 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
return Next;
}
+MachineOperand
+SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
+ APInt V(32, Val, true);
+ if (TII->isInlineConstant(V))
+ return MachineOperand::CreateImm(Val);
+
+ unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ MachineInstr *Mov =
+ BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::S_MOV_B32), Reg)
+ .addImm(Val);
+ (void)Mov;
+ LLVM_DEBUG(dbgs() << " "; Mov->dump());
+ return MachineOperand::CreateReg(Reg, false);
+}
+
+// Compute base address using Addr and return the final register.
+unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
+ const MemAddress &Addr) {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ DebugLoc DL = MI.getDebugLoc();
+
+ assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 ||
+ Addr.Base.LoSubReg) &&
+ "Expected 32-bit Base-Register-Low!!");
+
+ assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 ||
+ Addr.Base.HiSubReg) &&
+ "Expected 32-bit Base-Register-Hi!!");
+
+ LLVM_DEBUG(dbgs() << " Re-Computed Anchor-Base:\n");
+ MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
+ MachineOperand OffsetHi =
+ createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
+ unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned DeadCarryReg =
+ MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineInstr *LoHalf =
+ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
+ .addReg(CarryReg, RegState::Define)
+ .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
+ .add(OffsetLo);
+ (void)LoHalf;
+ LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
+
+ MachineInstr *HiHalf =
+ BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1)
+ .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
+ .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
+ .add(OffsetHi)
+ .addReg(CarryReg, RegState::Kill);
+ (void)HiHalf;
+ LLVM_DEBUG(dbgs() << " "; HiHalf->dump(););
+
+ unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ MachineInstr *FullBase =
+ BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ (void)FullBase;
+ LLVM_DEBUG(dbgs() << " "; FullBase->dump(); dbgs() << "\n";);
+
+ return FullDestReg;
+}
+
+// Update base and offset with the NewBase and NewOffset in MI.
+void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
+ unsigned NewBase,
+ int32_t NewOffset) {
+ TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
+ TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
+}
+
+Optional<int32_t>
+SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) {
+ if (Op.isImm())
+ return Op.getImm();
+
+ if (!Op.isReg())
+ return None;
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
+ if (!Def || Def->getOpcode() != AMDGPU::S_MOV_B32 ||
+ !Def->getOperand(1).isImm())
+ return None;
+
+ return Def->getOperand(1).getImm();
+}
+
+// Analyze Base and extracts:
+// - 32bit base registers, subregisters
+// - 64bit constant offset
+// Expecting base computation as:
+// %OFFSET0:sgpr_32 = S_MOV_B32 8000
+// %LO:vgpr_32, %c:sreg_64_xexec =
+// V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
+// %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
+// %Base:vreg_64 =
+// REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
+void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base,
+ MemAddress &Addr) {
+ if (!Base.isReg())
+ return;
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(Base.getReg());
+ if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE
+ || Def->getNumOperands() != 5)
+ return;
+
+ MachineOperand BaseLo = Def->getOperand(1);
+ MachineOperand BaseHi = Def->getOperand(3);
+ if (!BaseLo.isReg() || !BaseHi.isReg())
+ return;
+
+ MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg());
+ MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg());
+
+ if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 ||
+ !BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64)
+ return;
+
+ const auto *Src0 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src0);
+ const auto *Src1 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src1);
+
+ auto Offset0P = extractConstOffset(*Src0);
+ if (Offset0P)
+ BaseLo = *Src1;
+ else {
+ if (!(Offset0P = extractConstOffset(*Src1)))
+ return;
+ BaseLo = *Src0;
+ }
+
+ Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0);
+ Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1);
+
+ if (Src0->isImm())
+ std::swap(Src0, Src1);
+
+ if (!Src1->isImm())
+ return;
+
+ uint64_t Offset1 = Src1->getImm();
+ BaseHi = *Src0;
+
+ Addr.Base.LoReg = BaseLo.getReg();
+ Addr.Base.HiReg = BaseHi.getReg();
+ Addr.Base.LoSubReg = BaseLo.getSubReg();
+ Addr.Base.HiSubReg = BaseHi.getSubReg();
+ Addr.Offset = (*Offset0P & 0x00000000ffffffff) | (Offset1 << 32);
+}
+
+bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
+ MachineInstr &MI,
+ MemInfoMap &Visited,
+ SmallPtrSet<MachineInstr *, 4> &AnchorList) {
+
+ // TODO: Support flat and scratch.
+ if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 ||
+ TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
+ return false;
+
+ // TODO: Support Store.
+ if (!MI.mayLoad())
+ return false;
+
+ if (AnchorList.count(&MI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nTryToPromoteConstantOffsetToImmFor "; MI.dump());
+
+ if (TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm()) {
+ LLVM_DEBUG(dbgs() << " Const-offset is already promoted.\n";);
+ return false;
+ }
+
+ // Step1: Find the base-registers and a 64bit constant offset.
+ MachineOperand &Base = *TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+ MemAddress MAddr;
+ if (Visited.find(&MI) == Visited.end()) {
+ processBaseWithConstOffset(Base, MAddr);
+ Visited[&MI] = MAddr;
+ } else
+ MAddr = Visited[&MI];
+
+ if (MAddr.Offset == 0) {
+ LLVM_DEBUG(dbgs() << " Failed to extract constant-offset or there are no"
+ " constant offsets that can be promoted.\n";);
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << " BASE: {" << MAddr.Base.HiReg << ", "
+ << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";);
+
+ // Step2: Traverse through MI's basic block and find an anchor(that has the
+ // same base-registers) with the highest 13bit distance from MI's offset.
+ // E.g. (64bit loads)
+ // bb:
+ // addr1 = &a + 4096; load1 = load(addr1, 0)
+ // addr2 = &a + 6144; load2 = load(addr2, 0)
+ // addr3 = &a + 8192; load3 = load(addr3, 0)
+ // addr4 = &a + 10240; load4 = load(addr4, 0)
+ // addr5 = &a + 12288; load5 = load(addr5, 0)
+ //
+ // Starting from the first load, the optimization will try to find a new base
+ // from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192
+ // has 13bit distance from &a + 4096. The heuristic considers &a + 8192
+ // as the new-base(anchor) because of the maximum distance which can
+ // accomodate more intermediate bases presumeably.
+ //
+ // Step3: move (&a + 8192) above load1. Compute and promote offsets from
+ // (&a + 8192) for load1, load2, load4.
+ // addr = &a + 8192
+ // load1 = load(addr, -4096)
+ // load2 = load(addr, -2048)
+ // load3 = load(addr, 0)
+ // load4 = load(addr, 2048)
+ // addr5 = &a + 12288; load5 = load(addr5, 0)
+ //
+ MachineInstr *AnchorInst = nullptr;
+ MemAddress AnchorAddr;
+ uint32_t MaxDist = std::numeric_limits<uint32_t>::min();
+ SmallVector<std::pair<MachineInstr *, int64_t>, 4> InstsWCommonBase;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator E = MBB->end();
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ ++MBBI;
+ const SITargetLowering *TLI =
+ static_cast<const SITargetLowering *>(STM->getTargetLowering());
+
+ for ( ; MBBI != E; ++MBBI) {
+ MachineInstr &MINext = *MBBI;
+ // TODO: Support finding an anchor(with same base) from store addresses or
+ // any other load addresses where the opcodes are different.
+ if (MINext.getOpcode() != MI.getOpcode() ||
+ TII->getNamedOperand(MINext, AMDGPU::OpName::offset)->getImm())
+ continue;
+
+ const MachineOperand &BaseNext =
+ *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
+ MemAddress MAddrNext;
+ if (Visited.find(&MINext) == Visited.end()) {
+ processBaseWithConstOffset(BaseNext, MAddrNext);
+ Visited[&MINext] = MAddrNext;
+ } else
+ MAddrNext = Visited[&MINext];
+
+ if (MAddrNext.Base.LoReg != MAddr.Base.LoReg ||
+ MAddrNext.Base.HiReg != MAddr.Base.HiReg ||
+ MAddrNext.Base.LoSubReg != MAddr.Base.LoSubReg ||
+ MAddrNext.Base.HiSubReg != MAddr.Base.HiSubReg)
+ continue;
+
+ InstsWCommonBase.push_back(std::make_pair(&MINext, MAddrNext.Offset));
+
+ int64_t Dist = MAddr.Offset - MAddrNext.Offset;
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = Dist;
+ if (TLI->isLegalGlobalAddressingMode(AM) &&
+ (uint32_t)std::abs(Dist) > MaxDist) {
+ MaxDist = std::abs(Dist);
+
+ AnchorAddr = MAddrNext;
+ AnchorInst = &MINext;
+ }
+ }
+
+ if (AnchorInst) {
+ LLVM_DEBUG(dbgs() << " Anchor-Inst(with max-distance from Offset): ";
+ AnchorInst->dump());
+ LLVM_DEBUG(dbgs() << " Anchor-Offset from BASE: "
+ << AnchorAddr.Offset << "\n\n");
+
+ // Instead of moving up, just re-compute anchor-instruction's base address.
+ unsigned Base = computeBase(MI, AnchorAddr);
+
+ updateBaseAndOffset(MI, Base, MAddr.Offset - AnchorAddr.Offset);
+ LLVM_DEBUG(dbgs() << " After promotion: "; MI.dump(););
+
+ for (auto P : InstsWCommonBase) {
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = P.second - AnchorAddr.Offset;
+
+ if (TLI->isLegalGlobalAddressingMode(AM)) {
+ LLVM_DEBUG(dbgs() << " Promote Offset(" << P.second;
+ dbgs() << ")"; P.first->dump());
+ updateBaseAndOffset(*P.first, Base, P.second - AnchorAddr.Offset);
+ LLVM_DEBUG(dbgs() << " After promotion: "; P.first->dump());
+ }
+ }
+ AnchorList.insert(AnchorInst);
+ return true;
+ }
+
+ return false;
+}
+
// Scan through looking for adjacent LDS operations with constant offsets from
// the same base register. We rely on the scheduler to do the hard work of
// clustering nearby loads, and assume these are all adjacent.
bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
bool Modified = false;
+ // Contain the list
+ MemInfoMap Visited;
+ // Contains the list of instructions for which constant offsets are being
+ // promoted to the IMM.
+ SmallPtrSet<MachineInstr *, 4> AnchorList;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I;
+ if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
+ Modified = true;
+
// Don't combine if volatile.
if (MI.hasOrderedMemoryRef()) {
++I;
continue;
}
+ const unsigned Opc = MI.getOpcode();
+
CombineInfo CI;
CI.I = I;
- unsigned Opc = MI.getOpcode();
- if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 ||
- Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) {
+ CI.InstClass = getInstClass(Opc);
- CI.InstClass = DS_READ_WRITE;
+ switch (CI.InstClass) {
+ default:
+ break;
+ case DS_READ:
CI.EltSize =
- (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
-
+ (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
+ : 4;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeRead2Pair(CI);
} else {
++I;
}
-
continue;
- } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 ||
- Opc == AMDGPU::DS_WRITE_B32_gfx9 ||
- Opc == AMDGPU::DS_WRITE_B64_gfx9) {
- CI.InstClass = DS_READ_WRITE;
- CI.EltSize
- = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4;
-
+ case DS_WRITE:
+ CI.EltSize =
+ (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
+ : 4;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeWrite2Pair(CI);
} else {
++I;
}
-
continue;
- }
- if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
- Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
- // EltSize is in units of the offset encoding.
- CI.InstClass = S_BUFFER_LOAD_IMM;
+ case S_BUFFER_LOAD_IMM:
CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
- CI.IsX2 = Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeSBufferLoadImmPair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 16;
} else {
++I;
}
continue;
- }
- if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFSET ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET) {
- if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN)
- CI.InstClass = BUFFER_LOAD_OFFEN;
- else
- CI.InstClass = BUFFER_LOAD_OFFSET;
-
+ case BUFFER_LOAD_OFFEN:
+ case BUFFER_LOAD_OFFSET:
+ case BUFFER_LOAD_OFFEN_exact:
+ case BUFFER_LOAD_OFFSET_exact:
CI.EltSize = 4;
- CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
- Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeBufferLoadPair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
} else {
++I;
}
continue;
- }
-
- bool StoreIsX2, IsOffen;
- if (promoteBufferStoreOpcode(*I, StoreIsX2, IsOffen)) {
- CI.InstClass = IsOffen ? BUFFER_STORE_OFFEN : BUFFER_STORE_OFFSET;
+ case BUFFER_STORE_OFFEN:
+ case BUFFER_STORE_OFFSET:
+ case BUFFER_STORE_OFFEN_exact:
+ case BUFFER_STORE_OFFSET_exact:
CI.EltSize = 4;
- CI.IsX2 = StoreIsX2;
if (findMatchingInst(CI)) {
Modified = true;
I = mergeBufferStorePair(CI);
- if (!CI.IsX2)
- CreatedX2++;
+ OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
} else {
++I;
}
@@ -956,12 +1528,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Modified = false;
for (MachineBasicBlock &MBB : MF) {
- CreatedX2 = 0;
- Modified |= optimizeBlock(MBB);
-
- // Run again to convert x2 to x4.
- if (CreatedX2 >= 1)
+ do {
+ OptimizeAgain = false;
Modified |= optimizeBlock(MBB);
+ } while (OptimizeAgain);
}
return Modified;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ad30317c344c..1aa1feebbdae 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -85,9 +85,7 @@ private:
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
- void emitBreak(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
- void emitElseBreak(MachineInstr &MI);
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
@@ -329,20 +327,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
}
-void SILowerControlFlow::emitBreak(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Dst = MI.getOperand(0).getReg();
-
- MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(AMDGPU::EXEC)
- .add(MI.getOperand(1));
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *Or);
- MI.eraseFromParent();
-}
-
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -384,11 +368,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MI.eraseFromParent();
}
-void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
- // Lowered in the same way as emitIfBreak above.
- emitIfBreak(MI);
-}
-
void SILowerControlFlow::emitLoop(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -515,18 +494,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
emitElse(MI);
break;
- case AMDGPU::SI_BREAK:
- emitBreak(MI);
- break;
-
case AMDGPU::SI_IF_BREAK:
emitIfBreak(MI);
break;
- case AMDGPU::SI_ELSE_BREAK:
- emitElseBreak(MI);
- break;
-
case AMDGPU::SI_LOOP:
emitLoop(MI);
break;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ecc6cff407e1..eb038bb5d5fc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -5,37 +5,61 @@
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// i1 values are usually inserted by the CFG Structurize pass and they are
-/// unique in that they can be copied from VALU to SALU registers.
-/// This is not possible for any other value type. Since there are no
-/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
-///
//===----------------------------------------------------------------------===//
//
+// This pass lowers all occurrences of i1 values (with a vreg_1 register class)
+// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
+// and a wave-level control flow graph.
+//
+// Before this pass, values that are semantically i1 and are defined and used
+// within the same basic block are already represented as lane masks in scalar
+// registers. However, values that cross basic blocks are always transferred
+// between basic blocks in vreg_1 virtual registers and are lowered by this
+// pass.
+//
+// The only instructions that use or define vreg_1 virtual registers are COPY,
+// PHI, and IMPLICIT_DEF.
+//
+//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "si-i1-copies"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPULaneDominator.h"
-#include "llvm/CodeGen/LiveIntervals.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
+#define DEBUG_TYPE "si-i1-copies"
+
using namespace llvm;
+static unsigned createLaneMaskReg(MachineFunction &MF);
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+
namespace {
class SILowerI1Copies : public MachineFunctionPass {
public:
static char ID;
+private:
+ MachineFunction *MF = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachinePostDominatorTree *PDT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const GCNSubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+
+ DenseSet<unsigned> ConstrainRegs;
+
public:
SILowerI1Copies() : MachineFunctionPass(ID) {
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
@@ -47,14 +71,337 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+private:
+ void lowerCopiesFromI1();
+ void lowerPhis();
+ void lowerCopiesToI1();
+ bool isConstantLaneMask(unsigned Reg, bool &Val) const;
+ void buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, unsigned PrevReg, unsigned CurReg);
+ MachineBasicBlock::iterator
+ getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+
+ bool isLaneMaskReg(unsigned Reg) const {
+ return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
+ TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
+ ST->getWavefrontSize();
+ }
+};
+
+/// Helper class that determines the relationship between incoming values of a
+/// phi in the control flow graph to determine where an incoming value can
+/// simply be taken as a scalar lane mask as-is, and where it needs to be
+/// merged with another, previously defined lane mask.
+///
+/// The approach is as follows:
+/// - Determine all basic blocks which, starting from the incoming blocks,
+/// a wave may reach before entering the def block (the block containing the
+/// phi).
+/// - If an incoming block has no predecessors in this set, we can take the
+/// incoming value as a scalar lane mask as-is.
+/// -- A special case of this is when the def block has a self-loop.
+/// - Otherwise, the incoming value needs to be merged with a previously
+/// defined lane mask.
+/// - If there is a path into the set of reachable blocks that does _not_ go
+/// through an incoming block where we can take the scalar lane mask as-is,
+/// we need to invent an available value for the SSAUpdater. Choices are
+/// 0 and undef, with differing consequences for how to merge values etc.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+/// the traversal.
+///
+class PhiIncomingAnalysis {
+ MachinePostDominatorTree &PDT;
+
+ // For each reachable basic block, whether it is a source in the induced
+ // subgraph of the CFG.
+ DenseMap<MachineBasicBlock *, bool> ReachableMap;
+ SmallVector<MachineBasicBlock *, 4> ReachableOrdered;
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ SmallVector<MachineBasicBlock *, 4> Predecessors;
+
+public:
+ PhiIncomingAnalysis(MachinePostDominatorTree &PDT) : PDT(PDT) {}
+
+ /// Returns whether \p MBB is a source in the induced subgraph of reachable
+ /// blocks.
+ bool isSource(MachineBasicBlock &MBB) const {
+ return ReachableMap.find(&MBB)->second;
+ }
+
+ ArrayRef<MachineBasicBlock *> predecessors() const { return Predecessors; }
+
+ void analyze(MachineBasicBlock &DefBlock,
+ ArrayRef<MachineBasicBlock *> IncomingBlocks) {
+ assert(Stack.empty());
+ ReachableMap.clear();
+ ReachableOrdered.clear();
+ Predecessors.clear();
+
+ // Insert the def block first, so that it acts as an end point for the
+ // traversal.
+ ReachableMap.try_emplace(&DefBlock, false);
+ ReachableOrdered.push_back(&DefBlock);
+
+ for (MachineBasicBlock *MBB : IncomingBlocks) {
+ if (MBB == &DefBlock) {
+ ReachableMap[&DefBlock] = true; // self-loop on DefBlock
+ continue;
+ }
+
+ ReachableMap.try_emplace(MBB, false);
+ ReachableOrdered.push_back(MBB);
+
+ // If this block has a divergent terminator and the def block is its
+ // post-dominator, the wave may first visit the other successors.
+ bool Divergent = false;
+ for (MachineInstr &MI : MBB->terminators()) {
+ if (MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
+ MI.getOpcode() == AMDGPU::SI_IF ||
+ MI.getOpcode() == AMDGPU::SI_ELSE ||
+ MI.getOpcode() == AMDGPU::SI_LOOP) {
+ Divergent = true;
+ break;
+ }
+ }
+
+ if (Divergent && PDT.dominates(&DefBlock, MBB)) {
+ for (MachineBasicBlock *Succ : MBB->successors())
+ Stack.push_back(Succ);
+ }
+ }
+
+ while (!Stack.empty()) {
+ MachineBasicBlock *MBB = Stack.pop_back_val();
+ if (!ReachableMap.try_emplace(MBB, false).second)
+ continue;
+ ReachableOrdered.push_back(MBB);
+
+ for (MachineBasicBlock *Succ : MBB->successors())
+ Stack.push_back(Succ);
+ }
+
+ for (MachineBasicBlock *MBB : ReachableOrdered) {
+ bool HaveReachablePred = false;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (ReachableMap.count(Pred)) {
+ HaveReachablePred = true;
+ } else {
+ Stack.push_back(Pred);
+ }
+ }
+ if (!HaveReachablePred)
+ ReachableMap[MBB] = true;
+ if (HaveReachablePred) {
+ for (MachineBasicBlock *UnreachablePred : Stack) {
+ if (llvm::find(Predecessors, UnreachablePred) == Predecessors.end())
+ Predecessors.push_back(UnreachablePred);
+ }
+ }
+ Stack.clear();
+ }
+ }
+};
+
+/// Helper class that detects loops which require us to lower an i1 COPY into
+/// bitwise manipulation.
+///
+/// Unfortunately, we cannot use LoopInfo because LoopInfo does not distinguish
+/// between loops with the same header. Consider this example:
+///
+/// A-+-+
+/// | | |
+/// B-+ |
+/// | |
+/// C---+
+///
+/// A is the header of a loop containing A, B, and C as far as LoopInfo is
+/// concerned. However, an i1 COPY in B that is used in C must be lowered to
+/// bitwise operations to combine results from different loop iterations when
+/// B has a divergent branch (since by default we will compile this code such
+/// that threads in a wave are merged at the entry of C).
+///
+/// The following rule is implemented to determine whether bitwise operations
+/// are required: use the bitwise lowering for a def in block B if a backward
+/// edge to B is reachable without going through the nearest common
+/// post-dominator of B and all uses of the def.
+///
+/// TODO: This rule is conservative because it does not check whether the
+/// relevant branches are actually divergent.
+///
+/// The class is designed to cache the CFG traversal so that it can be re-used
+/// for multiple defs within the same basic block.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+/// the traversal.
+///
+class LoopFinder {
+ MachineDominatorTree &DT;
+ MachinePostDominatorTree &PDT;
+
+ // All visited / reachable block, tagged by level (level 0 is the def block,
+ // level 1 are all blocks reachable including but not going through the def
+ // block's IPDOM, etc.).
+ DenseMap<MachineBasicBlock *, unsigned> Visited;
+
+ // Nearest common dominator of all visited blocks by level (level 0 is the
+ // def block). Used for seeding the SSAUpdater.
+ SmallVector<MachineBasicBlock *, 4> CommonDominators;
+
+ // Post-dominator of all visited blocks.
+ MachineBasicBlock *VisitedPostDom = nullptr;
+
+ // Level at which a loop was found: 0 is not possible; 1 = a backward edge is
+ // reachable without going through the IPDOM of the def block (if the IPDOM
+ // itself has an edge to the def block, the loop level is 2), etc.
+ unsigned FoundLoopLevel = ~0u;
+
+ MachineBasicBlock *DefBlock = nullptr;
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ SmallVector<MachineBasicBlock *, 4> NextLevel;
+
+public:
+ LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
+ : DT(DT), PDT(PDT) {}
+
+ void initialize(MachineBasicBlock &MBB) {
+ Visited.clear();
+ CommonDominators.clear();
+ Stack.clear();
+ NextLevel.clear();
+ VisitedPostDom = nullptr;
+ FoundLoopLevel = ~0u;
+
+ DefBlock = &MBB;
+ }
+
+ /// Check whether a backward edge can be reached without going through the
+ /// given \p PostDom of the def block.
+ ///
+ /// Return the level of \p PostDom if a loop was found, or 0 otherwise.
+ unsigned findLoop(MachineBasicBlock *PostDom) {
+ MachineDomTreeNode *PDNode = PDT.getNode(DefBlock);
+
+ if (!VisitedPostDom)
+ advanceLevel();
+
+ unsigned Level = 0;
+ while (PDNode->getBlock() != PostDom) {
+ if (PDNode->getBlock() == VisitedPostDom)
+ advanceLevel();
+ PDNode = PDNode->getIDom();
+ Level++;
+ if (FoundLoopLevel == Level)
+ return Level;
+ }
+
+ return 0;
+ }
+
+ /// Add undef values dominating the loop and the optionally given additional
+ /// blocks, so that the SSA updater doesn't have to search all the way to the
+ /// function entry.
+ void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+ ArrayRef<MachineBasicBlock *> Blocks = {}) {
+ assert(LoopLevel < CommonDominators.size());
+
+ MachineBasicBlock *Dom = CommonDominators[LoopLevel];
+ for (MachineBasicBlock *MBB : Blocks)
+ Dom = DT.findNearestCommonDominator(Dom, MBB);
+
+ if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
+ SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+ } else {
+ // The dominator is part of the loop or the given blocks, so add the
+ // undef value to unreachable predecessors instead.
+ for (MachineBasicBlock *Pred : Dom->predecessors()) {
+ if (!inLoopLevel(*Pred, LoopLevel, Blocks))
+ SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+ }
+ }
+ }
+
+private:
+ bool inLoopLevel(MachineBasicBlock &MBB, unsigned LoopLevel,
+ ArrayRef<MachineBasicBlock *> Blocks) const {
+ auto DomIt = Visited.find(&MBB);
+ if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
+ return true;
+
+ if (llvm::find(Blocks, &MBB) != Blocks.end())
+ return true;
+
+ return false;
+ }
+
+ void advanceLevel() {
+ MachineBasicBlock *VisitedDom;
+
+ if (!VisitedPostDom) {
+ VisitedPostDom = DefBlock;
+ VisitedDom = DefBlock;
+ Stack.push_back(DefBlock);
+ } else {
+ VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock();
+ VisitedDom = CommonDominators.back();
+
+ for (unsigned i = 0; i < NextLevel.size();) {
+ if (PDT.dominates(VisitedPostDom, NextLevel[i])) {
+ Stack.push_back(NextLevel[i]);
+
+ NextLevel[i] = NextLevel.back();
+ NextLevel.pop_back();
+ } else {
+ i++;
+ }
+ }
+ }
+
+ unsigned Level = CommonDominators.size();
+ while (!Stack.empty()) {
+ MachineBasicBlock *MBB = Stack.pop_back_val();
+ if (!PDT.dominates(VisitedPostDom, MBB))
+ NextLevel.push_back(MBB);
+
+ Visited[MBB] = Level;
+ VisitedDom = DT.findNearestCommonDominator(VisitedDom, MBB);
+
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ == DefBlock) {
+ if (MBB == VisitedPostDom)
+ FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
+ else
+ FoundLoopLevel = std::min(FoundLoopLevel, Level);
+ continue;
+ }
+
+ if (Visited.try_emplace(Succ, ~0u).second) {
+ if (MBB == VisitedPostDom)
+ NextLevel.push_back(Succ);
+ else
+ Stack.push_back(Succ);
+ }
+ }
+ }
+
+ CommonDominators.push_back(VisitedDom);
+ }
};
} // End anonymous namespace.
-INITIALIZE_PASS(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower i1 Copies", false, false)
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+ false)
char SILowerI1Copies::ID = 0;
@@ -64,104 +411,415 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}
-bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+static unsigned createLaneMaskReg(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+}
+
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ unsigned UndefReg = createLaneMaskReg(MF);
+ BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+ UndefReg);
+ return UndefReg;
+}
- std::vector<unsigned> I1Defs;
+/// Lower all instructions that def or use vreg_1 registers.
+///
+/// In a first pass, we lower COPYs from vreg_1 to vector registers, as can
+/// occur around inline assembly. We do this first, before vreg_1 registers
+/// are changed to scalar mask registers.
+///
+/// Then we lower all defs of vreg_1 registers. Phi nodes are lowered before
+/// all others, because phi lowering looks through copies and can therefore
+/// often make copy lowering unnecessary.
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
+ MF = &TheMF;
+ MRI = &MF->getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
+ ST = &MF->getSubtarget<GCNSubtarget>();
+ TII = ST->getInstrInfo();
- MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
+ lowerCopiesFromI1();
+ lowerPhis();
+ lowerCopiesToI1();
- if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- if (RC == &AMDGPU::VReg_1RegClass)
- MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
- continue;
- }
+ for (unsigned Reg : ConstrainRegs)
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+ ConstrainRegs.clear();
+
+ return true;
+}
+void SILowerI1Copies::lowerCopiesFromI1() {
+ SmallVector<MachineInstr *, 4> DeadCopies;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != AMDGPU::COPY)
continue;
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
-
- if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
continue;
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
+ if (isLaneMaskReg(DstReg) ||
+ (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+ continue;
+ // Copy into a 32-bit vector register.
+ LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI);
DebugLoc DL = MI.getDebugLoc();
- MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
- if (DstRC == &AMDGPU::VReg_1RegClass &&
- TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
- I1Defs.push_back(Dst.getReg());
-
- if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
- if (DefInst->getOperand(1).isImm()) {
- I1Defs.push_back(Dst.getReg());
-
- int64_t Val = DefInst->getOperand(1).getImm();
- assert(Val == 0 || Val == -1);
-
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
- .add(Dst)
- .addImm(Val);
- MI.eraseFromParent();
- continue;
+
+ assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32);
+ assert(!MI.getOperand(0).getSubReg());
+
+ ConstrainRegs.insert(SrcReg);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
+ .addImm(-1)
+ .addReg(SrcReg);
+ DeadCopies.push_back(&MI);
+ }
+
+ for (MachineInstr *MI : DeadCopies)
+ MI->eraseFromParent();
+ DeadCopies.clear();
+ }
+}
+
+void SILowerI1Copies::lowerPhis() {
+ MachineSSAUpdater SSAUpdater(*MF);
+ LoopFinder LF(*DT, *PDT);
+ PhiIncomingAnalysis PIA(*PDT);
+ SmallVector<MachineInstr *, 4> DeadPhis;
+ SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
+ SmallVector<unsigned, 4> IncomingRegs;
+ SmallVector<unsigned, 4> IncomingUpdated;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ LF.initialize(MBB);
+
+ for (MachineInstr &MI : MBB.phis()) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
+
+ MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+
+ // Collect incoming values.
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ assert(i + 1 < MI.getNumOperands());
+ unsigned IncomingReg = MI.getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+ MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+ if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+ IncomingReg = IncomingDef->getOperand(1).getReg();
+ assert(isLaneMaskReg(IncomingReg));
+ assert(!IncomingDef->getOperand(1).getSubReg());
+ } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ continue;
+ } else {
+ assert(IncomingDef->isPHI());
+ }
+
+ IncomingBlocks.push_back(IncomingMBB);
+ IncomingRegs.push_back(IncomingReg);
+ }
+
+ // Phis in a loop that are observed outside the loop receive a simple but
+ // conservatively correct treatment.
+ MachineBasicBlock *PostDomBound = &MBB;
+ for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+ PostDomBound =
+ PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+ }
+
+ unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+
+ SSAUpdater.Initialize(DstReg);
+
+ if (FoundLoopLevel) {
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(IncomingBlocks[i],
+ IncomingUpdated.back());
+ }
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+ }
+ } else {
+ // The phi is not observed from outside a loop. Use a more accurate
+ // lowering.
+ PIA.analyze(MBB, IncomingBlocks);
+
+ for (MachineBasicBlock *MBB : PIA.predecessors())
+ SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ if (PIA.isSource(IMBB)) {
+ IncomingUpdated.push_back(0);
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
+ } else {
+ IncomingUpdated.push_back(createLaneMaskReg(*MF));
+ SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
}
}
- unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc)
- .add(Src);
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .add(Dst)
- .addImm(0)
- .addImm(-1)
- .addReg(TmpSrc);
- MI.eraseFromParent();
- } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
- SrcRC == &AMDGPU::VReg_1RegClass) {
- if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
- DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() &&
- DefInst->getOperand(1).getImm() == 0 &&
- DefInst->getOperand(2).getImm() != 0 &&
- DefInst->getOperand(3).isReg() &&
- TargetRegisterInfo::isVirtualRegister(
- DefInst->getOperand(3).getReg()) &&
- TRI->getCommonSubClass(
- MRI.getRegClass(DefInst->getOperand(3).getReg()),
- &AMDGPU::SGPR_64RegClass) &&
- AMDGPU::laneDominates(DefInst->getParent(), &MBB)) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
- .add(Dst)
- .addReg(AMDGPU::EXEC)
- .add(DefInst->getOperand(3));
- } else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
- .add(Dst)
- .add(Src)
- .addImm(0);
+ for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+ if (!IncomingUpdated[i])
+ continue;
+
+ MachineBasicBlock &IMBB = *IncomingBlocks[i];
+ buildMergeLaneMasks(
+ IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+ SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
}
- MI.eraseFromParent();
+ }
+
+ unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+ if (NewReg != DstReg) {
+ MRI->replaceRegWith(NewReg, DstReg);
+
+ // Ensure that DstReg has a single def and mark the old PHI node for
+ // deletion.
+ MI.getOperand(0).setReg(NewReg);
+ DeadPhis.push_back(&MI);
+ }
+
+ IncomingBlocks.clear();
+ IncomingRegs.clear();
+ IncomingUpdated.clear();
+ }
+
+ for (MachineInstr *MI : DeadPhis)
+ MI->eraseFromParent();
+ DeadPhis.clear();
+ }
+}
+
+void SILowerI1Copies::lowerCopiesToI1() {
+ MachineSSAUpdater SSAUpdater(*MF);
+ LoopFinder LF(*DT, *PDT);
+ SmallVector<MachineInstr *, 4> DeadCopies;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ LF.initialize(MBB);
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
+ MI.getOpcode() != AMDGPU::COPY)
+ continue;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ continue;
+
+ if (MRI->use_empty(DstReg)) {
+ DeadCopies.push_back(&MI);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
+
+ MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
+ continue;
+
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ assert(!MI.getOperand(1).getSubReg());
+
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !isLaneMaskReg(SrcReg)) {
+ assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
+ unsigned TmpReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ MI.getOperand(1).setReg(TmpReg);
+ SrcReg = TmpReg;
+ }
+
+ // Defs in a loop that are observed outside the loop must be transformed
+ // into appropriate bit manipulation.
+ MachineBasicBlock *PostDomBound = &MBB;
+ for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+ PostDomBound =
+ PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+ }
+
+ unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+ if (FoundLoopLevel) {
+ SSAUpdater.Initialize(DstReg);
+ SSAUpdater.AddAvailableValue(&MBB, DstReg);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+
+ buildMergeLaneMasks(MBB, MI, DL, DstReg,
+ SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
+ DeadCopies.push_back(&MI);
}
}
+
+ for (MachineInstr *MI : DeadCopies)
+ MI->eraseFromParent();
+ DeadCopies.clear();
}
+}
- for (unsigned Reg : I1Defs)
- MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
+ const MachineInstr *MI;
+ for (;;) {
+ MI = MRI->getUniqueVRegDef(Reg);
+ if (MI->getOpcode() != AMDGPU::COPY)
+ break;
+
+ Reg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!isLaneMaskReg(Reg))
+ return false;
+ }
+
+ if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+ return false;
+
+ if (!MI->getOperand(1).isImm())
+ return false;
+
+ int64_t Imm = MI->getOperand(1).getImm();
+ if (Imm == 0) {
+ Val = false;
+ return true;
+ }
+ if (Imm == -1) {
+ Val = true;
+ return true;
+ }
return false;
}
+
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+ Def = false;
+ Use = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+ if (MO.isUse())
+ Use = true;
+ else
+ Def = true;
+ }
+ }
+}
+
+/// Return a point at the end of the given \p MBB to insert SALU instructions
+/// for lane mask calculation. Take terminators and SCC into account.
+MachineBasicBlock::iterator
+SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+ auto InsertionPt = MBB.getFirstTerminator();
+ bool TerminatorsUseSCC = false;
+ for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
+ bool DefsSCC;
+ instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+ if (TerminatorsUseSCC || DefsSCC)
+ break;
+ }
+
+ if (!TerminatorsUseSCC)
+ return InsertionPt;
+
+ while (InsertionPt != MBB.begin()) {
+ InsertionPt--;
+
+ bool DefSCC, UseSCC;
+ instrDefsUsesSCC(*InsertionPt, DefSCC, UseSCC);
+ if (DefSCC)
+ return InsertionPt;
+ }
+
+ // We should have at least seen an IMPLICIT_DEF or COPY
+ llvm_unreachable("SCC used by terminator but no def in block");
+}
+
+void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DstReg,
+ unsigned PrevReg, unsigned CurReg) {
+ bool PrevVal;
+ bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
+ bool CurVal;
+ bool CurConstant = isConstantLaneMask(CurReg, CurVal);
+
+ if (PrevConstant && CurConstant) {
+ if (PrevVal == CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
+ } else if (CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
+ .addReg(AMDGPU::EXEC)
+ .addImm(-1);
+ }
+ return;
+ }
+
+ unsigned PrevMaskedReg = 0;
+ unsigned CurMaskedReg = 0;
+ if (!PrevConstant) {
+ if (CurConstant && CurVal) {
+ PrevMaskedReg = PrevReg;
+ } else {
+ PrevMaskedReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+ .addReg(PrevReg)
+ .addReg(AMDGPU::EXEC);
+ }
+ }
+ if (!CurConstant) {
+ // TODO: check whether CurReg is already masked by EXEC
+ if (PrevConstant && PrevVal) {
+ CurMaskedReg = CurReg;
+ } else {
+ CurMaskedReg = createLaneMaskReg(*MF);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+ .addReg(CurReg)
+ .addReg(AMDGPU::EXEC);
+ }
+ }
+
+ if (PrevConstant && !PrevVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+ .addReg(CurMaskedReg);
+ } else if (CurConstant && !CurVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+ .addReg(PrevMaskedReg);
+ } else if (PrevConstant && PrevVal) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+ .addReg(CurMaskedReg)
+ .addReg(AMDGPU::EXEC);
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+ .addReg(PrevMaskedReg)
+ .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0d5ff75e37ed..181cc41bd5ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -117,7 +117,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- bool MaySpill = ST.isVGPRSpillingEnabled(F);
bool HasStackObjects = FrameInfo.hasStackObjects();
if (isEntryFunction()) {
@@ -126,21 +125,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (WorkItemIDZ)
WorkItemIDY = true;
- if (HasStackObjects || MaySpill) {
- PrivateSegmentWaveByteOffset = true;
+ PrivateSegmentWaveByteOffset = true;
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- ArgInfo.PrivateSegmentWaveByteOffset
- = ArgDescriptor::createRegister(AMDGPU::SGPR5);
- }
+ ArgInfo.PrivateSegmentWaveByteOffset =
+ ArgDescriptor::createRegister(AMDGPU::SGPR5);
}
- bool IsCOV2 = ST.isAmdCodeObjectV2(F);
- if (IsCOV2) {
- if (HasStackObjects || MaySpill)
- PrivateSegmentBuffer = true;
+ bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (isAmdHsaOrMesa) {
+ PrivateSegmentBuffer = true;
if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
DispatchPtr = true;
@@ -151,14 +147,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
} else if (ST.isMesaGfxShader(F)) {
- if (HasStackObjects || MaySpill)
- ImplicitBufferPtr = true;
+ ImplicitBufferPtr = true;
}
if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
- if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls that may require it before argument lowering.
if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 18754442898f..fb7e670068fe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -471,7 +471,7 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(DAG);
+ DAG->dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -611,13 +611,11 @@ void SIScheduleBlock::printDebug(bool full) {
dbgs() << "\nInstructions:\n";
if (!Scheduled) {
- for (SUnit* SU : SUnits) {
- SU->dump(DAG);
- }
+ for (const SUnit* SU : SUnits)
+ DAG->dumpNode(*SU);
} else {
- for (SUnit* SU : SUnits) {
- SU->dump(DAG);
- }
+ for (const SUnit* SU : SUnits)
+ DAG->dumpNode(*SU);
}
dbgs() << "///////////////////////\n";
@@ -1933,7 +1931,7 @@ void SIScheduleDAGMI::schedule()
LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
buildDAGWithRegPressure();
- LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+ LLVM_DEBUG(dump());
topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -1957,12 +1955,12 @@ void SIScheduleDAGMI::schedule()
for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
- unsigned BaseLatReg;
+ MachineOperand *BaseLatOp;
int64_t OffLatReg;
if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
IsLowLatencySU[i] = 1;
- if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
- TRI))
+ if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
+ TRI))
LowLatencyOffset[i] = OffLatReg;
} else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
IsHighLatencySU[i] = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 938cdaf1ef8f..b4a4e9e33133 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -202,8 +202,6 @@ public:
class SIMemOpAccess final {
private:
-
- AMDGPUAS SIAddrSpaceInfo;
AMDGPUMachineModuleInfo *MMI = nullptr;
/// Reports unsupported message \p Msg for \p MI to LLVM context.
@@ -255,7 +253,7 @@ protected:
/// Instruction info.
const SIInstrInfo *TII = nullptr;
- IsaInfo::IsaVersion IV;
+ IsaVersion IV;
SICacheControl(const GCNSubtarget &ST);
@@ -453,22 +451,21 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
}
SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
- if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
return SIAtomicAddrSpace::FLAT;
- if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return SIAtomicAddrSpace::GLOBAL;
- if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
+ if (AS == AMDGPUAS::LOCAL_ADDRESS)
return SIAtomicAddrSpace::LDS;
- if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return SIAtomicAddrSpace::SCRATCH;
- if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
+ if (AS == AMDGPUAS::REGION_ADDRESS)
return SIAtomicAddrSpace::GDS;
return SIAtomicAddrSpace::OTHER;
}
SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
- SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
}
@@ -608,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
- IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
+ IV = getIsaVersion(ST.getCPU());
}
/* static */
@@ -815,6 +812,12 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
+ const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
+
+ const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
+ ? AMDGPU::BUFFER_WBINVL1
+ : AMDGPU::BUFFER_WBINVL1_VOL;
+
if (Pos == Position::AFTER)
++MI;
@@ -822,7 +825,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
+ BuildMI(MBB, MI, DL, TII->get(Flush));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
new file mode 100644
index 000000000000..883fd308f2f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -0,0 +1,406 @@
+//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass inserts changes to the Mode register settings as required.
+/// Note that currently it only deals with the Double Precision Floating Point
+/// rounding mode setting, but is intended to be generic enough to be easily
+/// expanded.
+///
+//===----------------------------------------------------------------------===//
+//
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <queue>
+
+#define DEBUG_TYPE "si-mode-register"
+
+STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
+
+using namespace llvm;
+
+struct Status {
+ // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
+ // known value
+ unsigned Mask;
+ unsigned Mode;
+
+ Status() : Mask(0), Mode(0){};
+
+ Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
+ Mode &= Mask;
+ };
+
+ // merge two status values such that only values that don't conflict are
+ // preserved
+ Status merge(const Status &S) const {
+ return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
+ }
+
+ // merge an unknown value by using the unknown value's mask to remove bits
+ // from the result
+ Status mergeUnknown(unsigned newMask) {
+ return Status(Mask & ~newMask, Mode & ~newMask);
+ }
+
+ // intersect two Status values to produce a mode and mask that is a subset
+ // of both values
+ Status intersect(const Status &S) const {
+ unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
+ unsigned NewMode = (Mode & NewMask);
+ return Status(NewMask, NewMode);
+ }
+
+ // produce the delta required to change the Mode to the required Mode
+ Status delta(const Status &S) const {
+ return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
+ }
+
+ bool operator==(const Status &S) const {
+ return (Mask == S.Mask) && (Mode == S.Mode);
+ }
+
+ bool operator!=(const Status &S) const { return !(*this == S); }
+
+ bool isCompatible(Status &S) {
+ return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
+ }
+
+ bool isCombinable(Status &S) {
+ return !(Mask & S.Mask) || isCompatible(S);
+ }
+};
+
+class BlockData {
+public:
+ // The Status that represents the mode register settings required by the
+ // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
+ Status Require;
+
+ // The Status that represents the net changes to the Mode register made by
+ // this block, Calculated in Phase 1.
+ Status Change;
+
+ // The Status that represents the mode register settings on exit from this
+ // block. Calculated in Phase 2.
+ Status Exit;
+
+ // The Status that represents the intersection of exit Mode register settings
+ // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
+ Status Pred;
+
+ // In Phase 1 we record the first instruction that has a mode requirement,
+ // which is used in Phase 3 if we need to insert a mode change.
+ MachineInstr *FirstInsertionPoint;
+
+ BlockData() : FirstInsertionPoint(nullptr) {};
+};
+
+namespace {
+
+class SIModeRegister : public MachineFunctionPass {
+public:
+ static char ID;
+
+ std::vector<std::unique_ptr<BlockData>> BlockInfo;
+ std::queue<MachineBasicBlock *> Phase2List;
+
+ // The default mode register setting currently only caters for the floating
+ // point double precision rounding mode.
+ // We currently assume the default rounding mode is Round to Nearest
+ // NOTE: this should come from a per function rounding mode setting once such
+ // a setting exists.
+ unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
+ Status DefaultStatus =
+ Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
+
+public:
+ SIModeRegister() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+ Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
+
+ void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
+ const SIInstrInfo *TII, Status InstrMode);
+};
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
+ "Insert required mode register values", false, false)
+
+char SIModeRegister::ID = 0;
+
+char &llvm::SIModeRegisterID = SIModeRegister::ID;
+
+FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
+
+// Determine the Mode register setting required for this instruction.
+// Instructions which don't use the Mode register return a null Status.
+// Note this currently only deals with instructions that use the floating point
+// double precision setting.
+Status SIModeRegister::getInstructionMode(MachineInstr &MI,
+ const SIInstrInfo *TII) {
+ if (TII->usesFPDPRounding(MI)) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_INTERP_P1LL_F16:
+ case AMDGPU::V_INTERP_P1LV_F16:
+ case AMDGPU::V_INTERP_P2_F16:
+ // f16 interpolation instructions need double precision round to zero
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
+ default:
+ return DefaultStatus;
+ }
+ }
+ return Status();
+}
+
+// Insert a setreg instruction to update the Mode register.
+// It is possible (though unlikely) for an instruction to require a change to
+// the value of disjoint parts of the Mode register when we don't know the
+// value of the intervening bits. In that case we need to use more than one
+// setreg instruction.
+void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
+ const SIInstrInfo *TII, Status InstrMode) {
+ while (InstrMode.Mask) {
+ unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
+ unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
+ unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
+ BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+ .addImm(Value)
+ .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
+ (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+ (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
+ ++NumSetregInserted;
+ InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
+ }
+}
+
+// In Phase 1 we iterate through the instructions of the block and for each
+// instruction we get its mode usage. If the instruction uses the Mode register
+// we:
+// - update the Change status, which tracks the changes to the Mode register
+// made by this block
+// - if this instruction's requirements are compatible with the current setting
+// of the Mode register we merge the modes
+// - if it isn't compatible and an InsertionPoint isn't set, then we set the
+// InsertionPoint to the current instruction, and we remember the current
+// mode
+// - if it isn't compatible and InsertionPoint is set we insert a seteg before
+// that instruction (unless this instruction forms part of the block's
+// entry requirements in which case the insertion is deferred until Phase 3
+// when predecessor exit values are known), and move the insertion point to
+// this instruction
+// - if this is a setreg instruction we treat it as an incompatible instruction.
+// This is sub-optimal but avoids some nasty corner cases, and is expected to
+// occur very rarely.
+// - on exit we have set the Require, Change, and initial Exit modes.
+void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+ auto NewInfo = llvm::make_unique<BlockData>();
+ MachineInstr *InsertionPoint = nullptr;
+ // RequirePending is used to indicate whether we are collecting the initial
+ // requirements for the block, and need to defer the first InsertionPoint to
+ // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
+ // we discover an explict setreg that means this block doesn't have any
+ // initial requirements.
+ bool RequirePending = true;
+ Status IPChange;
+ for (MachineInstr &MI : MBB) {
+ Status InstrMode = getInstructionMode(MI, TII);
+ if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
+ (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
+ // We preserve any explicit mode register setreg instruction we encounter,
+ // as we assume it has been inserted by a higher authority (this is
+ // likely to be a very rare occurrence).
+ unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
+ if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
+ AMDGPU::Hwreg::ID_MODE)
+ continue;
+
+ unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
+ AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
+ 1;
+ unsigned Offset =
+ (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
+ unsigned Mask = ((1 << Width) - 1) << Offset;
+
+ // If an InsertionPoint is set we will insert a setreg there.
+ if (InsertionPoint) {
+ insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+ InsertionPoint = nullptr;
+ }
+ // If this is an immediate then we know the value being set, but if it is
+ // not an immediate then we treat the modified bits of the mode register
+ // as unknown.
+ if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
+ unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
+ unsigned Mode = (Val << Offset) & Mask;
+ Status Setreg = Status(Mask, Mode);
+ // If we haven't already set the initial requirements for the block we
+ // don't need to as the requirements start from this explicit setreg.
+ RequirePending = false;
+ NewInfo->Change = NewInfo->Change.merge(Setreg);
+ } else {
+ NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
+ }
+ } else if (!NewInfo->Change.isCompatible(InstrMode)) {
+ // This instruction uses the Mode register and its requirements aren't
+ // compatible with the current mode.
+ if (InsertionPoint) {
+ // If the required mode change cannot be included in the current
+ // InsertionPoint changes, we need a setreg and start a new
+ // InsertionPoint.
+ if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
+ if (RequirePending) {
+ // This is the first insertionPoint in the block so we will defer
+ // the insertion of the setreg to Phase 3 where we know whether or
+ // not it is actually needed.
+ NewInfo->FirstInsertionPoint = InsertionPoint;
+ NewInfo->Require = NewInfo->Change;
+ RequirePending = false;
+ } else {
+ insertSetreg(MBB, InsertionPoint, TII,
+ IPChange.delta(NewInfo->Change));
+ IPChange = NewInfo->Change;
+ }
+ // Set the new InsertionPoint
+ InsertionPoint = &MI;
+ }
+ NewInfo->Change = NewInfo->Change.merge(InstrMode);
+ } else {
+ // No InsertionPoint is currently set - this is either the first in
+ // the block or we have previously seen an explicit setreg.
+ InsertionPoint = &MI;
+ IPChange = NewInfo->Change;
+ NewInfo->Change = NewInfo->Change.merge(InstrMode);
+ }
+ }
+ }
+ if (RequirePending) {
+ // If we haven't yet set the initial requirements for the block we set them
+ // now.
+ NewInfo->FirstInsertionPoint = InsertionPoint;
+ NewInfo->Require = NewInfo->Change;
+ } else if (InsertionPoint) {
+ // We need to insert a setreg at the InsertionPoint
+ insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+ }
+ NewInfo->Exit = NewInfo->Change;
+ BlockInfo[MBB.getNumber()] = std::move(NewInfo);
+}
+
+// In Phase 2 we revisit each block and calculate the common Mode register
+// value provided by all predecessor blocks. If the Exit value for the block
+// is changed, then we add the successor blocks to the worklist so that the
+// exit value is propagated.
+void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+// BlockData *BI = BlockInfo[MBB.getNumber()];
+ unsigned ThisBlock = MBB.getNumber();
+ if (MBB.pred_empty()) {
+ // There are no predecessors, so use the default starting status.
+ BlockInfo[ThisBlock]->Pred = DefaultStatus;
+ } else {
+ // Build a status that is common to all the predecessors by intersecting
+ // all the predecessor exit status values.
+ MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
+ MachineBasicBlock &PB = *(*P);
+ BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
+
+ for (P = std::next(P); P != E; P = std::next(P)) {
+ MachineBasicBlock *Pred = *P;
+ BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
+ }
+ }
+ Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
+ if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
+ BlockInfo[ThisBlock]->Exit = TmpStatus;
+ // Add the successors to the work list so we can propagate the changed exit
+ // status.
+ for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+ E = MBB.succ_end();
+ S != E; S = std::next(S)) {
+ MachineBasicBlock &B = *(*S);
+ Phase2List.push(&B);
+ }
+ }
+}
+
+// In Phase 3 we revisit each block and if it has an insertion point defined we
+// check whether the predecessor mode meets the block's entry requirements. If
+// not we insert an appropriate setreg instruction to modify the Mode register.
+void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
+ const SIInstrInfo *TII) {
+// BlockData *BI = BlockInfo[MBB.getNumber()];
+ unsigned ThisBlock = MBB.getNumber();
+ if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
+ Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
+ if (BlockInfo[ThisBlock]->FirstInsertionPoint)
+ insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
+ else
+ insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
+ }
+}
+
+bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+ BlockInfo.resize(MF.getNumBlockIDs());
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ // Processing is performed in a number of phases
+
+ // Phase 1 - determine the initial mode required by each block, and add setreg
+ // instructions for intra block requirements.
+ for (MachineBasicBlock &BB : MF)
+ processBlockPhase1(BB, TII);
+
+ // Phase 2 - determine the exit mode from each block. We add all blocks to the
+ // list here, but will also add any that need to be revisited during Phase 2
+ // processing.
+ for (MachineBasicBlock &BB : MF)
+ Phase2List.push(&BB);
+ while (!Phase2List.empty()) {
+ processBlockPhase2(*Phase2List.front(), TII);
+ Phase2List.pop();
+ }
+
+ // Phase 3 - add an initial setreg to each block where the required entry mode
+ // is not satisfied by the exit mode of all its predecessors.
+ for (MachineBasicBlock &BB : MF)
+ processBlockPhase3(BB, TII);
+
+ BlockInfo.clear();
+
+ return NumSetregInserted > 0;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 7b678d12ba81..c671fed34bdf 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -103,6 +103,122 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI,
return SaveExecInst;
}
+// Optimize sequence
+// %sel = V_CNDMASK_B32_e64 0, 1, %cc
+// %cmp = V_CMP_NE_U32 1, %1
+// $vcc = S_AND_B64 $exec, %cmp
+// S_CBRANCH_VCC[N]Z
+// =>
+// $vcc = S_ANDN2_B64 $exec, %cc
+// S_CBRANCH_VCC[N]Z
+//
+// It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
+// rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
+// only 3 first instructions are really needed. S_AND_B64 with exec is a
+// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
+// lanes.
+//
+// Returns %cc register on success.
+static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
+ const GCNSubtarget &ST,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const unsigned AndOpc = AMDGPU::S_AND_B64;
+ const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
+ const unsigned CondReg = AMDGPU::VCC;
+ const unsigned ExecReg = AMDGPU::EXEC;
+
+ auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return Opc == AMDGPU::S_CBRANCH_VCCZ ||
+ Opc == AMDGPU::S_CBRANCH_VCCNZ; });
+ if (I == MBB.terminators().end())
+ return AMDGPU::NoRegister;
+
+ auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
+ *I, MRI, LIS);
+ if (!And || And->getOpcode() != AndOpc ||
+ !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
+ return AMDGPU::NoRegister;
+
+ MachineOperand *AndCC = &And->getOperand(1);
+ unsigned CmpReg = AndCC->getReg();
+ unsigned CmpSubReg = AndCC->getSubReg();
+ if (CmpReg == ExecReg) {
+ AndCC = &And->getOperand(2);
+ CmpReg = AndCC->getReg();
+ CmpSubReg = AndCC->getSubReg();
+ } else if (And->getOperand(2).getReg() != ExecReg) {
+ return AMDGPU::NoRegister;
+ }
+
+ auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
+ if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
+ Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
+ Cmp->getParent() != And->getParent())
+ return AMDGPU::NoRegister;
+
+ MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
+ MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
+ if (Op1->isImm() && Op2->isReg())
+ std::swap(Op1, Op2);
+ if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
+ return AMDGPU::NoRegister;
+
+ unsigned SelReg = Op1->getReg();
+ auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
+ if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+ return AMDGPU::NoRegister;
+
+ Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
+ Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
+ MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
+ if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
+ Op1->getImm() != 0 || Op2->getImm() != 1)
+ return AMDGPU::NoRegister;
+
+ LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
+ << *Cmp << '\t' << *And);
+
+ unsigned CCReg = CC->getReg();
+ LIS->RemoveMachineInstrFromMaps(*And);
+ MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
+ TII->get(Andn2Opc), And->getOperand(0).getReg())
+ .addReg(ExecReg)
+ .addReg(CCReg, CC->getSubReg());
+ And->eraseFromParent();
+ LIS->InsertMachineInstrInMaps(*Andn2);
+
+ LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
+
+ // Try to remove compare. Cmp value should not used in between of cmp
+ // and s_and_b64 if VCC or just unused if any other register.
+ if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
+ MRI.use_nodbg_empty(CmpReg)) ||
+ (CmpReg == CondReg &&
+ std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
+ [&](const MachineInstr &MI) {
+ return MI.readsRegister(CondReg, TRI); }))) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
+
+ LIS->RemoveMachineInstrFromMaps(*Cmp);
+ Cmp->eraseFromParent();
+
+ // Try to remove v_cndmask_b32.
+ if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
+ MRI.use_nodbg_empty(SelReg)) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
+ }
+ }
+
+ return CCReg;
+}
+
bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -117,9 +233,24 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
+ if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
+ RecalcRegs.insert(Reg);
+ RecalcRegs.insert(AMDGPU::VCC_LO);
+ RecalcRegs.insert(AMDGPU::VCC_HI);
+ RecalcRegs.insert(AMDGPU::SCC);
+ Changed = true;
+ }
+
// Try to remove unneeded instructions before s_endpgm.
if (MBB.succ_empty()) {
- if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
+ if (MBB.empty())
+ continue;
+
+ // Skip this if the endpgm has any implicit uses, otherwise we would need
+ // to be careful to update / remove them.
+ MachineInstr &Term = MBB.back();
+ if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
+ Term.getNumOperands() != 0)
continue;
SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 0e000b72962e..2d43d5d05ef6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -90,7 +90,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineBasicBlock &MBB);
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
- bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const;
+ bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
+ void pseudoOpConvertToVOP2(MachineInstr &MI,
+ const GCNSubtarget &ST) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
@@ -854,7 +856,82 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
}
}
-bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
+// Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
+// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA
+// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa.
+//
+// We are transforming from a VOP3 into a VOP2 form of the instruction.
+// %19:vgpr_32 = V_AND_B32_e32 255,
+// killed %16:vgpr_32, implicit $exec
+// %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64
+// %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
+// %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
+// %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
+//
+// becomes
+// %47:vgpr_32 = V_ADD_I32_sdwa
+// 0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
+// implicit-def $vcc, implicit $exec
+// %48:vgpr_32 = V_ADDC_U32_e32
+// 0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
+void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
+ const GCNSubtarget &ST) const {
+ int Opc = MI.getOpcode();
+ assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) &&
+ "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64");
+
+ // Can the candidate MI be shrunk?
+ if (!TII->canShrink(MI, *MRI))
+ return;
+ Opc = AMDGPU::getVOPe32(Opc);
+ // Find the related ADD instruction.
+ const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (!Sdst)
+ return;
+ MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
+ if (!NextOp)
+ return;
+ MachineInstr &MISucc = *NextOp->getParent();
+ // Can the successor be shrunk?
+ if (!TII->canShrink(MISucc, *MRI))
+ return;
+ int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
+ // Make sure the carry in/out are subsequently unused.
+ MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
+ if (!CarryIn)
+ return;
+ MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
+ if (!CarryOut)
+ return;
+ if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
+ return;
+ // Make sure VCC or its subregs are dead before MI.
+ MachineBasicBlock &MBB = *MI.getParent();
+ auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return;
+ // Check if VCC is referenced in range of (MI,MISucc].
+ for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
+ I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::VCC, TRI))
+ return;
+ }
+ // Make the two new e32 instruction variants.
+ // Replace MI with V_{SUB|ADD}_I32_e32
+ auto NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+ NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1));
+ MI.eraseFromParent();
+ // Replace MISucc with V_{SUBB|ADDC}_U32_e32
+ auto NewInst = BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0));
+ NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1));
+ MISucc.eraseFromParent();
+}
+
+bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
const GCNSubtarget &ST) const {
// Check if this is already an SDWA instruction
unsigned Opc = MI.getOpcode();
@@ -1127,6 +1204,22 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
bool Changed = false;
do {
+ // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
+ // Look for a possible ADD or SUB that resulted from a previously lowered
+ // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
+ // lowers the pair of instructions into e32 form.
+ matchSDWAOperands(MBB);
+ for (const auto &OperandPair : SDWAOperands) {
+ const auto &Operand = OperandPair.second;
+ MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+ if (PotentialMI &&
+ (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 ||
+ PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64))
+ pseudoOpConvertToVOP2(*PotentialMI, ST);
+ }
+ SDWAOperands.clear();
+
+ // Generate potential match list.
matchSDWAOperands(MBB);
for (const auto &OperandPair : SDWAOperands) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 624607f6ea54..97cfde2b2354 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -18,9 +18,12 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -495,15 +498,16 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
- MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .add(*Reg)
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MachineInstrBuilder NewMI =
+ BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+ .add(*Reg)
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata_in);
@@ -900,7 +904,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
.addImm(0) // glc
.addMemOperand(MMO);
- if (NumSubRegs > 1)
+ if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
continue;
@@ -914,7 +918,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
- if (NumSubRegs > 1)
+ if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
} else {
if (OnlyToVGPR)
@@ -1598,3 +1602,57 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
llvm_unreachable("not implemented");
}
}
+
+// Find reaching register definition
+MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
+ MachineInstr &Use,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) const {
+ auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
+ SlotIndex UseIdx = LIS->getInstructionIndex(Use);
+ SlotIndex DefIdx;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!LIS->hasInterval(Reg))
+ return nullptr;
+ LiveInterval &LI = LIS->getInterval(Reg);
+ LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ VNInfo *V = nullptr;
+ if (LI.hasSubRanges()) {
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & SubLanes) == SubLanes) {
+ V = S.getVNInfoAt(UseIdx);
+ break;
+ }
+ }
+ } else {
+ V = LI.getVNInfoAt(UseIdx);
+ }
+ if (!V)
+ return nullptr;
+ DefIdx = V->def;
+ } else {
+ // Find last def.
+ for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
+ LiveRange &LR = LIS->getRegUnit(*Units);
+ if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
+ if (!DefIdx.isValid() ||
+ MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
+ LIS->getInstructionFromIndex(V->def)))
+ DefIdx = V->def;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+
+ MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
+
+ if (!Def || !MDT.dominates(Def, &Use))
+ return nullptr;
+
+ assert(Def->modifiesRegister(Reg, this));
+
+ return Def;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 5a51b67ca719..b82fefde47e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -228,6 +228,12 @@ public:
getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const override;
+ // Find reaching register definition
+ MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
+ MachineInstr &Use,
+ MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) const;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f87a0763b353..c625ecc9b750 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -435,7 +435,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
let AllocationPriority = 7;
}
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 8;
}
@@ -444,13 +444,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add
let isAllocatable = 0;
}
-def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 8;
}
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 8;
@@ -459,15 +459,15 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
// Requires 2 s_mov_b64 to copy
let CopyCost = 2 in {
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
let AllocationPriority = 10;
}
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
let isAllocatable = 0;
}
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64, v2f64], 32,
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
(add SGPR_128, TTMP_128)> {
let AllocationPriority = 10;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4189bcce52ea..6ad7dd0e3a7c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -64,59 +64,6 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() {
return new SIShrinkInstructions();
}
-static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
- const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI) {
-
- const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- // Can't shrink instruction with three operands.
- // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
- // a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
- // a register allocation hint pre-regalloc and then do the shrinking
- // post-regalloc.
- if (Src2) {
- switch (MI.getOpcode()) {
- default: return false;
-
- case AMDGPU::V_ADDC_U32_e64:
- case AMDGPU::V_SUBB_U32_e64:
- case AMDGPU::V_SUBBREV_U32_e64: {
- const MachineOperand *Src1
- = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()))
- return false;
- // Additional verification is needed for sdst/src2.
- return true;
- }
- case AMDGPU::V_MAC_F32_e64:
- case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64:
- if (!Src2->isReg() || !TRI.isVGPR(MRI, Src2->getReg()) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
- return false;
- break;
-
- case AMDGPU::V_CNDMASK_B32_e64:
- break;
- }
- }
-
- const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1 && (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()) ||
- TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
- return false;
-
- // We don't need to check src0, all input types are legal, so just make sure
- // src0 isn't using any modifiers.
- if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
- return false;
-
- // Check output modifiers
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
- !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
-}
-
/// This function checks \p MI for operands defined by a move immediate
/// instruction and then folds the literal constant into the instruction if it
/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
@@ -173,19 +120,6 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
return false;
}
-// Copy MachineOperand with all flags except setting it as implicit.
-static void copyFlagsToImplicitVCC(MachineInstr &MI,
- const MachineOperand &Orig) {
-
- for (MachineOperand &Use : MI.implicit_operands()) {
- if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
- Use.setIsUndef(Orig.isUndef());
- Use.setIsKill(Orig.isKill());
- return;
- }
- }
-}
-
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
return isInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(),
@@ -278,6 +212,245 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
}
}
+/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
+/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
+/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
+/// XNOR (as a ^ b == ~(a ^ ~b)).
+/// \returns true if the caller should continue the machine function iterator
+static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
+ MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII,
+ MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ const MachineOperand *Dest = &MI.getOperand(0);
+ MachineOperand *Src0 = &MI.getOperand(1);
+ MachineOperand *Src1 = &MI.getOperand(2);
+ MachineOperand *SrcReg = Src0;
+ MachineOperand *SrcImm = Src1;
+
+ if (SrcImm->isImm() &&
+ !AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) {
+ uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
+ uint32_t NewImm = 0;
+
+ if (Opc == AMDGPU::S_AND_B32) {
+ if (isPowerOf2_32(~Imm)) {
+ NewImm = countTrailingOnes(Imm);
+ Opc = AMDGPU::S_BITSET0_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ANDN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_OR_B32) {
+ if (isPowerOf2_32(Imm)) {
+ NewImm = countTrailingZeros(Imm);
+ Opc = AMDGPU::S_BITSET1_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ORN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_XOR_B32) {
+ if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_XNOR_B32;
+ }
+ } else {
+ llvm_unreachable("unexpected opcode");
+ }
+
+ if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
+ SrcImm == Src0) {
+ if (!TII->commuteInstruction(MI, false, 1, 2))
+ NewImm = 0;
+ }
+
+ if (NewImm != 0) {
+ if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
+ SrcReg->isReg()) {
+ MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+ MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ return true;
+ }
+
+ if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+ MI.setDesc(TII->get(Opc));
+ if (Opc == AMDGPU::S_BITSET0_B32 ||
+ Opc == AMDGPU::S_BITSET1_B32) {
+ Src0->ChangeToImmediate(NewImm);
+ MI.RemoveOperand(2);
+ } else {
+ SrcImm->setImm(NewImm);
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// This is the same as MachineInstr::readsRegister/modifiesRegister except
+// it takes subregs into account.
+static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ for (const MachineOperand &MO : R) {
+ if (!MO.isReg())
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return true;
+ } else if (MO.getReg() == Reg &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
+ TRI.getSubRegIndexLaneMask(MO.getSubReg());
+ if (Overlap.any())
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool instReadsReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->uses(), Reg, SubReg, TRI);
+}
+
+static bool instModifiesReg(const MachineInstr *MI,
+ unsigned Reg, unsigned SubReg,
+ const SIRegisterInfo &TRI) {
+ return instAccessReg(MI->defs(), Reg, SubReg, TRI);
+}
+
+static TargetInstrInfo::RegSubRegPair
+getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
+ const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
+ if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
+ } else {
+ LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
+ Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+ }
+ }
+ return TargetInstrInfo::RegSubRegPair(Reg, Sub);
+}
+
+// Match:
+// mov t, x
+// mov x, y
+// mov y, t
+//
+// =>
+//
+// mov t, x (t is potentially dead and move eliminated)
+// v_swap_b32 x, y
+//
+// Returns next valid instruction pointer if was able to create v_swap_b32.
+//
+// This shall not be done too early not to prevent possible folding which may
+// remove matched moves, and this should prefereably be done before RA to
+// release saved registers and also possibly after RA which can insert copies
+// too.
+//
+// This is really just a generic peephole that is not a canocical shrinking,
+// although requirements match the pass placement and it reduces code size too.
+static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII) {
+ assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MovT.getOpcode() == AMDGPU::COPY);
+
+ unsigned T = MovT.getOperand(0).getReg();
+ unsigned Tsub = MovT.getOperand(0).getSubReg();
+ MachineOperand &Xop = MovT.getOperand(1);
+
+ if (!Xop.isReg())
+ return nullptr;
+ unsigned X = Xop.getReg();
+ unsigned Xsub = Xop.getSubReg();
+
+ unsigned Size = TII->getOpSize(MovT, 0) / 4;
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ if (!TRI.isVGPR(MRI, X))
+ return nullptr;
+
+ for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
+ if (YTop.getSubReg() != Tsub)
+ continue;
+
+ MachineInstr &MovY = *YTop.getParent();
+ if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY.getOpcode() != AMDGPU::COPY) ||
+ MovY.getOperand(1).getSubReg() != Tsub)
+ continue;
+
+ unsigned Y = MovY.getOperand(0).getReg();
+ unsigned Ysub = MovY.getOperand(0).getSubReg();
+
+ if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ continue;
+
+ MachineInstr *MovX = nullptr;
+ auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
+ for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
+ instModifiesReg(&*I, Y, Ysub, TRI) ||
+ instModifiesReg(&*I, T, Tsub, TRI) ||
+ (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
+ MovX = nullptr;
+ break;
+ }
+ if (!instReadsReg(&*I, Y, Ysub, TRI)) {
+ if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
+ MovX = nullptr;
+ break;
+ }
+ continue;
+ }
+ if (MovX ||
+ (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ I->getOpcode() != AMDGPU::COPY) ||
+ I->getOperand(0).getReg() != X ||
+ I->getOperand(0).getSubReg() != Xsub) {
+ MovX = nullptr;
+ break;
+ }
+ MovX = &*I;
+ }
+
+ if (!MovX || I == E)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+
+ for (unsigned I = 0; I < Size; ++I) {
+ TargetInstrInfo::RegSubRegPair X1, Y1;
+ X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
+ Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
+ BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
+ TII->get(AMDGPU::V_SWAP_B32))
+ .addDef(X1.Reg, 0, X1.SubReg)
+ .addDef(Y1.Reg, 0, Y1.SubReg)
+ .addReg(Y1.Reg, 0, Y1.SubReg)
+ .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+ }
+ MovX->eraseFromParent();
+ MovY.eraseFromParent();
+ MachineInstr *Next = &*std::next(MovT.getIterator());
+ if (MRI.use_nodbg_empty(T))
+ MovT.eraseFromParent();
+ else
+ Xop.setIsKill(false);
+
+ return Next;
+ }
+
+ return nullptr;
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -285,7 +458,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
std::vector<unsigned> I1Defs;
@@ -319,6 +491,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI.getOpcode() == AMDGPU::COPY)) {
+ if (auto *NextMI = matchSwap(MI, MRI, TII)) {
+ Next = NextMI->getIterator();
+ continue;
+ }
+ }
+
// Combine adjacent s_nops to use the immediate operand encoding how long
// to wait.
//
@@ -408,14 +588,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Shrink scalar logic operations.
+ if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
+ MI.getOpcode() == AMDGPU::S_OR_B32 ||
+ MI.getOpcode() == AMDGPU::S_XOR_B32) {
+ if (shrinkScalarLogicOp(ST, MRI, TII, MI))
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
- if (!canShrink(MI, TII, TRI, MRI)) {
+ if (!TII->canShrink(MI, MRI)) {
// Try commuting the instruction and see if that enables us to shrink
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
- !canShrink(MI, TII, TRI, MRI))
+ !TII->canShrink(MI, MRI))
continue;
}
@@ -488,40 +676,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// We can shrink this instruction
LLVM_DEBUG(dbgs() << "Shrinking " << MI);
- MachineInstrBuilder Inst32 =
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
- // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
- // For VOPC instructions, this is replaced by an implicit def of vcc.
- int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
- if (Op32DstIdx != -1) {
- // dst
- Inst32.add(MI.getOperand(0));
- } else {
- assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
- "Unexpected case");
- }
-
-
- Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
- const MachineOperand *Src1 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1)
- Inst32.add(*Src1);
-
- if (Src2) {
- int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
- if (Op32Src2Idx != -1) {
- Inst32.add(*Src2);
- } else {
- // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
- // replaced with an implicit read of vcc. This was already added
- // during the initial BuildMI, so find it to preserve the flags.
- copyFlagsToImplicitVCC(*Inst32, *Src2);
- }
- }
-
+ MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
++NumInstructionsShrunk;
// Copy extra operands not present in the instruction definition.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
index 7485326017b2..8a063e1a4867 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -375,83 +375,6 @@ defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
}
//===----------------------------------------------------------------------===//
-// Scalar Memory Patterns
-//===----------------------------------------------------------------------===//
-
-
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
- auto Ld = cast<LoadSDNode>(N);
- return Ld->getAlignment() >= 4 &&
- ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
- (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
- !Ld->isVolatile() && !N->isDivergent() &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
-}]>;
-
-def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
-def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
-def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
-def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
-def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
-
-multiclass SMRD_Pattern <string Instr, ValueType vt> {
-
- // 1. IMM offset
- def : GCNPat <
- (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
- >;
-
- // 2. SGPR offset
- def : GCNPat <
- (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
- >;
-}
-
-let OtherPredicates = [isSICI] in {
-def : GCNPat <
- (i64 (readcyclecounter)),
- (S_MEMTIME)
->;
-}
-
-// Global and constant loads can be selected to either MUBUF or SMRD
-// instructions, but SMRD instructions are faster so we want the instruction
-// selector to prefer those.
-let AddedComplexity = 100 in {
-
-defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-
-// 1. Offset as an immediate
-def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on CI
- (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0)
->;
-
-// 2. Offset loaded in an 32bit SGPR
-def : GCNPat <
- (SIload_constant v4i32:$sbase, i32:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
->;
-
-} // End let AddedComplexity = 100
-
-let OtherPredicates = [isVI] in {
-
-def : GCNPat <
- (i64 (readcyclecounter)),
- (S_MEMREALTIME)
->;
-
-} // let OtherPredicates = [isVI]
-
-
-//===----------------------------------------------------------------------===//
// Targets
//===----------------------------------------------------------------------===//
@@ -757,25 +680,97 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
-let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
+//===----------------------------------------------------------------------===//
+// Scalar Memory Patterns
+//===----------------------------------------------------------------------===//
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
+
+def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
+
+ // 1. IMM offset
+ def : GCNPat <
+ (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+ >;
+
+ // 2. 32-bit IMM offset on CI
+ def : GCNPat <
+ (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+ (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+ let OtherPredicates = [isCIOnly];
+ }
-class SMRD_Pattern_ci <string Instr, ValueType vt> : GCNPat <
- (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
- let OtherPredicates = [isCIOnly];
+ // 3. SGPR offset
+ def : GCNPat <
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ >;
}
-def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
+multiclass SMLoad_Pattern <string Instr, ValueType vt> {
+ // 1. Offset as an immediate
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+ >;
+ // 2. 32-bit IMM offset on CI
+ def : GCNPat <
+ (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
+ (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
+ let OtherPredicates = [isCIOnly];
+ }
+
+ // 3. Offset loaded in an 32bit SGPR
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+ >;
+}
+
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
+
+defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
+} // End let AddedComplexity = 100
+
+let OtherPredicates = [isSICI] in {
def : GCNPat <
- (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> {
- let OtherPredicates = [isCI]; // should this be isCIOnly?
+ (i64 (readcyclecounter)),
+ (S_MEMTIME)
+>;
}
-} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity
+let OtherPredicates = [isVI] in {
+def : GCNPat <
+ (i64 (readcyclecounter)),
+ (S_MEMREALTIME)
+>;
+
+} // let OtherPredicates = [isVI]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 6f5db9644c86..ca5e981ac5c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -336,42 +336,54 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
"$sdst, $src0, $src1", pattern
>;
+class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0),
+ (Op $src0),
+ [{ return !N->isDivergent(); }]
+>;
+
+class UniformBinFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0, node:$src1),
+ (Op $src0, $src1),
+ [{ return !N->isDivergent(); }]
+>;
+
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <"s_add_u32">;
def S_ADD_I32 : SOP2_32 <"s_add_i32",
- [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<add> SSrc_b32:$src0, SSrc_b32:$src1))]
>;
} // End isCommutable = 1
def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
def S_SUB_I32 : SOP2_32 <"s_sub_i32",
- [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<sub> SSrc_b32:$src0, SSrc_b32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
def S_ADDC_U32 : SOP2_32 <"s_addc_u32",
- [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+ [(set i32:$sdst, (UniformBinFrag<adde> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End isCommutable = 1
def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
- [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+ [(set i32:$sdst, (UniformBinFrag<sube> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End Uses = [SCC]
let isCommutable = 1 in {
def S_MIN_I32 : SOP2_32 <"s_min_i32",
- [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <"s_min_u32",
- [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <"s_max_i32",
- [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <"s_max_u32",
- [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
} // End Defs = [SCC]
@@ -385,27 +397,27 @@ let Uses = [SCC] in {
let Defs = [SCC] in {
let isCommutable = 1 in {
def S_AND_B32 : SOP2_32 <"s_and_b32",
- [(set i32:$sdst, (and i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, i32:$src1))]
>;
def S_AND_B64 : SOP2_64 <"s_and_b64",
- [(set i64:$sdst, (and i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, i64:$src1))]
>;
def S_OR_B32 : SOP2_32 <"s_or_b32",
- [(set i32:$sdst, (or i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, i32:$src1))]
>;
def S_OR_B64 : SOP2_64 <"s_or_b64",
- [(set i64:$sdst, (or i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, i64:$src1))]
>;
def S_XOR_B32 : SOP2_32 <"s_xor_b32",
- [(set i32:$sdst, (xor i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<xor> i32:$src0, i32:$src1))]
>;
def S_XOR_B64 : SOP2_64 <"s_xor_b64",
- [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<xor> i64:$src0, i64:$src1))]
>;
def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
@@ -415,45 +427,71 @@ def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
[(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
>;
+
+def S_NAND_B32 : SOP2_32 <"s_nand_b32",
+ [(set i32:$sdst, (not (and_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NAND_B64 : SOP2_64 <"s_nand_b64",
+ [(set i64:$sdst, (not (and_oneuse i64:$src0, i64:$src1)))]
+>;
+
+def S_NOR_B32 : SOP2_32 <"s_nor_b32",
+ [(set i32:$sdst, (not (or_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NOR_B64 : SOP2_64 <"s_nor_b64",
+ [(set i64:$sdst, (not (or_oneuse i64:$src0, i64:$src1)))]
+>;
} // End isCommutable = 1
-def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">;
-def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64">;
-def S_ORN2_B32 : SOP2_32 <"s_orn2_b32">;
-def S_ORN2_B64 : SOP2_64 <"s_orn2_b64">;
-def S_NAND_B32 : SOP2_32 <"s_nand_b32">;
-def S_NAND_B64 : SOP2_64 <"s_nand_b64">;
-def S_NOR_B32 : SOP2_32 <"s_nor_b32">;
-def S_NOR_B64 : SOP2_64 <"s_nor_b64">;
+def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32",
+ [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64",
+ [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
+
+def S_ORN2_B32 : SOP2_32 <"s_orn2_b32",
+ [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ORN2_B64 : SOP2_64 <"s_orn2_b64",
+ [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
} // End Defs = [SCC]
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
let Defs = [SCC] in {
+// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set i32:$sdst, (shl i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set i64:$sdst, (shl i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set i32:$sdst, (srl i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set i64:$sdst, (srl i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set i32:$sdst, (sra i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set i64:$sdst, (sra i64:$src0, i32:$src1))]
+ [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
>;
} // End Defs = [SCC]
def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
- [(set i32:$sdst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
+ [(set i32:$sdst, (UniformBinFrag<AMDGPUbfm> i32:$src0, i32:$src1))]>;
def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
+
+// TODO: S_MUL_I32 require V_MUL_LO_I32 from VOP3 change
def S_MUL_I32 : SOP2_32 <"s_mul_i32",
[(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
let isCommutable = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4eba19382315..54c866bdc63c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,6 +128,49 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
return NewInfo ? NewInfo->Opcode : -1;
}
+struct MUBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t dwords;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+#define GET_MUBUFInfoTable_DECL
+#define GET_MUBUFInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMUBUFBaseOpcode(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
+ const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMUBUFDwords(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->dwords : 0;
+}
+
+bool getMUBUFHasVAddr(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMUBUFHasSrsrc(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMUBUFHasSoffset(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -137,122 +180,75 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
- // GCN GFX6 (Southern Islands (SI)).
- if (Features.test(FeatureISAVersion6_0_0))
- return {6, 0, 0};
- if (Features.test(FeatureISAVersion6_0_1))
- return {6, 0, 1};
-
- // GCN GFX7 (Sea Islands (CI)).
- if (Features.test(FeatureISAVersion7_0_0))
- return {7, 0, 0};
- if (Features.test(FeatureISAVersion7_0_1))
- return {7, 0, 1};
- if (Features.test(FeatureISAVersion7_0_2))
- return {7, 0, 2};
- if (Features.test(FeatureISAVersion7_0_3))
- return {7, 0, 3};
- if (Features.test(FeatureISAVersion7_0_4))
- return {7, 0, 4};
- if (Features.test(FeatureSeaIslands))
- return {7, 0, 0};
-
- // GCN GFX8 (Volcanic Islands (VI)).
- if (Features.test(FeatureISAVersion8_0_1))
- return {8, 0, 1};
- if (Features.test(FeatureISAVersion8_0_2))
- return {8, 0, 2};
- if (Features.test(FeatureISAVersion8_0_3))
- return {8, 0, 3};
- if (Features.test(FeatureISAVersion8_1_0))
- return {8, 1, 0};
- if (Features.test(FeatureVolcanicIslands))
- return {8, 0, 0};
-
- // GCN GFX9.
- if (Features.test(FeatureISAVersion9_0_0))
- return {9, 0, 0};
- if (Features.test(FeatureISAVersion9_0_2))
- return {9, 0, 2};
- if (Features.test(FeatureISAVersion9_0_4))
- return {9, 0, 4};
- if (Features.test(FeatureISAVersion9_0_6))
- return {9, 0, 6};
- if (Features.test(FeatureGFX9))
- return {9, 0, 0};
-
- if (Features.test(FeatureSouthernIslands))
- return {0, 0, 0};
- return {7, 0, 0};
-}
-
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
- auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
+ auto Version = getIsaVersion(STI->getCPU());
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
- << ISAVersion.Major
- << ISAVersion.Minor
- << ISAVersion.Stepping;
+ << Version.Major
+ << Version.Minor
+ << Version.Stepping;
if (hasXNACK(*STI))
Stream << "+xnack";
+ if (hasSRAMECC(*STI))
+ Stream << "+sram-ecc";
Stream.flush();
}
bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits().test(FeatureCodeObjectV3);
+ return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
+ STI->getFeatureBits().test(FeatureCodeObjectV3);
}
-unsigned getWavefrontSize(const FeatureBitset &Features) {
- if (Features.test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
- if (Features.test(FeatureWavefrontSize32))
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
return 32;
return 64;
}
-unsigned getLocalMemorySize(const FeatureBitset &Features) {
- if (Features.test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
return 32768;
- if (Features.test(FeatureLocalMemorySize65536))
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
-unsigned getEUsPerCU(const FeatureBitset &Features) {
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- if (!Features.test(FeatureGCN))
+ if (!STI->getFeatureBits().test(FeatureGCN))
return 8;
- unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU() * getEUsPerCU(Features);
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+ return getMaxWavesPerEU() * getEUsPerCU(STI);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
}
-unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -261,89 +257,89 @@ unsigned getMaxWavesPerEU() {
return 10;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
- getEUsPerCU(Features)) / getEUsPerCU(Features);
+ return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI)) / getEUsPerCU(STI);
}
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 1;
}
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 2048;
}
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
- getWavefrontSize(Features);
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+ getWavefrontSize(STI);
}
-unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 16;
return 8;
}
-unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
return 8;
}
-unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 800;
return 512;
}
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
- if (Features.test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 102;
return 104;
}
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
- if (Features.test(FeatureTrapHandler))
+ unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
- return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
}
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(Features);
- unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
- unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
- if (Features.test(FeatureTrapHandler))
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed) {
unsigned ExtraSGPRs = 0;
if (VCCUsed)
ExtraSGPRs = 2;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -358,74 +354,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
return ExtraSGPRs;
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed) {
- return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
- Features[AMDGPU::FeatureXNACK]);
+ return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+ STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
}
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+ return NumSGPRs / getSGPREncodingGranule(STI) - 1;
}
-unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
- return getVGPRAllocGranule(Features);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
+ return getVGPRAllocGranule(STI);
}
-unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return 256;
}
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
- return getTotalNumVGPRs(Features);
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ return getTotalNumVGPRs(STI);
}
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
- getVGPRAllocGranule(Features)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+ alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+ getVGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
}
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
- getVGPRAllocGranule(Features));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+ return NumVGPRs / getVGPREncodingGranule(STI) - 1;
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features) {
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = ISA.Major;
- Header.amd_machine_version_minor = ISA.Minor;
- Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.amd_machine_version_major = Version.Major;
+ Header.amd_machine_version_minor = Version.Minor;
+ Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
@@ -513,7 +509,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
@@ -522,15 +518,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -542,7 +538,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
return Waitcnt | VmcntHi;
}
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
@@ -554,22 +550,30 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
+ Waitcnt Decoded;
+ Decoded.VmCnt = decodeVmcnt(Version, Encoded);
+ Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
+ Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
+ return Decoded;
+}
+
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -580,17 +584,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -599,6 +603,10 @@ unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
return Waitcnt;
}
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
+ return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
+}
+
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
@@ -643,6 +651,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
+bool hasSRAMECC(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+}
+
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
}
@@ -798,6 +810,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VS_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
+ case AMDGPU::SReg_64_XEXECRegClassID:
return 64;
case AMDGPU::VReg_96RegClassID:
return 96;
@@ -935,27 +948,50 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
-} // end namespace AMDGPU
-
-} // end namespace llvm
-
-namespace llvm {
-namespace AMDGPU {
-
-AMDGPUAS getAMDGPUAS(Triple T) {
- AMDGPUAS AS;
- AS.FLAT_ADDRESS = 0;
- AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 2;
- return AS;
-}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align) {
+ const uint32_t MaxImm = alignDown(4095, Align);
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Align) & ~4095;
+ uint32_t Low = (Imm + Align) & 4095;
+ Imm = Low;
+ Overflow = High - Align;
+ }
+ }
-AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
- return getAMDGPUAS(M.getTargetTriple());
-}
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 &&
+ Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
-AMDGPUAS getAMDGPUAS(const Module &M) {
- return getAMDGPUAS(Triple(M.getTargetTriple()));
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
}
namespace {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 5b7af8268cda..20123ed4ac81 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -19,6 +19,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include <cstdint>
#include <string>
#include <utility>
@@ -26,8 +27,10 @@
namespace llvm {
class Argument;
+class AMDGPUSubtarget;
class FeatureBitset;
class Function;
+class GCNSubtarget;
class GlobalValue;
class MCContext;
class MCRegisterClass;
@@ -54,16 +57,6 @@ enum {
TRAP_NUM_SGPRS = 16
};
-/// Instruction set architecture version.
-struct IsaVersion {
- unsigned Major;
- unsigned Minor;
- unsigned Stepping;
-};
-
-/// \returns Isa version for given subtarget \p Features.
-IsaVersion getIsaVersion(const FeatureBitset &Features);
-
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
@@ -71,114 +64,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
/// false otherwise.
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-/// \returns Wavefront size for given subtarget \p Features.
-unsigned getWavefrontSize(const FeatureBitset &Features);
+/// \returns Wavefront size for given subtarget \p STI.
+unsigned getWavefrontSize(const MCSubtargetInfo *STI);
-/// \returns Local memory size in bytes for given subtarget \p Features.
-unsigned getLocalMemorySize(const FeatureBitset &Features);
+/// \returns Local memory size in bytes for given subtarget \p STI.
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
/// \returns Number of execution units per compute unit for given subtarget \p
-/// Features.
-unsigned getEUsPerCU(const FeatureBitset &Features);
+/// STI.
+unsigned getEUsPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of work groups per compute unit for given subtarget
-/// \p Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+/// \p STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features without any kind of limitation.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Minimum number of waves per execution unit for given subtarget \p
-/// Features.
-unsigned getMinWavesPerEU(const FeatureBitset &Features);
+/// STI.
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features without any kind of limitation.
+/// STI without any kind of limitation.
unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns Minimum flat work group size for given subtarget \p Features.
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Minimum flat work group size for given subtarget \p STI.
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Maximum flat work group size for given subtarget \p Features.
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Maximum flat work group size for given subtarget \p STI.
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Number of waves per work group for given subtarget \p Features and
+/// \returns Number of waves per work group for given subtarget \p STI and
/// limited by given \p FlatWorkGroupSize.
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns SGPR allocation granularity for given subtarget \p Features.
-unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+/// \returns SGPR allocation granularity for given subtarget \p STI.
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns SGPR encoding granularity for given subtarget \p Features.
-unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+/// \returns SGPR encoding granularity for given subtarget \p STI.
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of SGPRs for given subtarget \p Features.
-unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+/// \returns Total number of SGPRs for given subtarget \p STI.
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of SGPRs for given subtarget \p Features.
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of SGPRs for given subtarget \p STI.
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used. XNACK is inferred from
-/// \p Features.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used. XNACK is inferred from
+/// \p STI.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed);
-/// \returns Number of SGPR blocks needed for given subtarget \p Features when
+/// \returns Number of SGPR blocks needed for given subtarget \p STI when
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
/// register counts.
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
-/// \returns VGPR allocation granularity for given subtarget \p Features.
-unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+/// \returns VGPR allocation granularity for given subtarget \p STI.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns VGPR encoding granularity for given subtarget \p Features.
-unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+/// \returns VGPR encoding granularity for given subtarget \p STI.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of VGPRs for given subtarget \p Features.
-unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+/// \returns Total number of VGPRs for given subtarget \p STI.
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of VGPRs for given subtarget \p Features.
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of VGPRs for given subtarget \p STI.
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-/// \returns Number of VGPR blocks needed for given subtarget \p Features when
+/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
} // end namespace IsaInfo
@@ -191,6 +184,7 @@ struct MIMGBaseOpcodeInfo {
bool Atomic;
bool AtomicX2;
bool Sampler;
+ bool Gather4;
uint8_t NumExtraArgs;
bool Gradients;
@@ -228,10 +222,28 @@ LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
LLVM_READONLY
+int getMUBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
+
+LLVM_READONLY
+int getMUBUFDwords(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features);
+ const MCSubtargetInfo *STI);
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
@@ -265,26 +277,52 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+ unsigned VmCnt = ~0u;
+ unsigned ExpCnt = ~0u;
+ unsigned LgkmCnt = ~0u;
+
+ Waitcnt() {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+
+ static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+
+ bool dominates(const Waitcnt &Other) const {
+ return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
+ LgkmCnt <= Other.LgkmCnt;
+ }
+
+ Waitcnt combined(const Waitcnt &Other) const {
+ return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
+ std::min(LgkmCnt, Other.LgkmCnt));
+ }
+};
+
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getVmcntBitMask(const IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getExpcntBitMask(const IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getWaitcntBitMask(const IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -295,19 +333,21 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt);
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
@@ -322,9 +362,11 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
unsigned getInitialPSInputAddr(const Function &F);
LLVM_READNONE
@@ -349,6 +391,7 @@ inline bool isKernel(CallingConv::ID CC) {
}
bool hasXNACK(const MCSubtargetInfo &STI);
+bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
@@ -447,6 +490,9 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align = 4);
+
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
deleted file mode 100644
index 1924f71f11c8..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- AMDGPULaneDominator.cpp - Determine Lane Dominators ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// MBB A lane-dominates MBB B if
-// 1. A dominates B in the usual sense, i.e. every path from the entry to B
-// goes through A, and
-// 2. whenever B executes, every active lane during that execution of B was
-// also active during the most recent execution of A.
-//
-// The simplest example where A dominates B but does not lane-dominate it is
-// where A is a loop:
-//
-// |
-// +--+
-// A |
-// +--+
-// |
-// B
-//
-// Unfortunately, the second condition is not fully captured by the control
-// flow graph when it is unstructured (as may happen when branch conditions are
-// uniform).
-//
-// The following replacement of the second condition is a conservative
-// approximation. It is an equivalent condition when the CFG is fully
-// structured:
-//
-// 2'. every cycle in the CFG that contains A also contains B.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPULaneDominator.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-
-namespace llvm {
-
-namespace AMDGPU {
-
-// Given machine basic blocks A and B where A dominates B, check whether
-// A lane-dominates B.
-//
-// The check is conservative, i.e. there can be false-negatives.
-bool laneDominates(MachineBasicBlock *A, MachineBasicBlock *B) {
- // Check whether A is reachable from itself without going through B.
- DenseSet<MachineBasicBlock *> Reachable;
- SmallVector<MachineBasicBlock *, 8> Stack;
-
- Stack.push_back(A);
- do {
- MachineBasicBlock *MBB = Stack.back();
- Stack.pop_back();
-
- for (MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ == A)
- return false;
- if (Succ != B && Reachable.insert(Succ).second)
- Stack.push_back(Succ);
- }
- } while (!Stack.empty());
-
- return true;
-}
-
-} // namespace AMDGPU
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
deleted file mode 100644
index 4f33a89a364b..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- AMDGPULaneDominator.h ------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-
-namespace llvm {
-
-class MachineBasicBlock;
-
-namespace AMDGPU {
-
-bool laneDominates(MachineBasicBlock *MBBA, MachineBasicBlock *MBBB);
-
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 9f0a4d29b5e4..82ffdef8e674 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -46,6 +46,7 @@
int64_t Value = 0; \
if (!expectAbsExpression(MCParser, Value, Err)) \
return false; \
+ C.compute_pgm_resource_registers &= ~(SetMacro(0xFFFFFFFFFFFFFFFFULL) << Shift); \
C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
return true; \
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4c7a92219755..68446ab79720 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -84,6 +84,10 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP1";
}
+class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -103,6 +107,8 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP1_DPP_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -173,7 +179,9 @@ defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
+let FPDPRounding = 1 in {
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+} // End FPDPRounding = 1
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -226,7 +234,9 @@ defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
+let FPDPRounding = 1 in {
defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
+} // End FPDPRounding = 1
} // End SchedRW = [WriteDoubleAdd]
defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
@@ -242,7 +252,9 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC64 = VRegSrc_32;
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
// Special case because there are no true output operands. Hack vdst
@@ -271,7 +283,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
+
let HasDst = 0;
let EmitDst = 1; // force vdst emission
}
@@ -328,8 +343,10 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
let SubtargetPredicate = Has16BitInsts in {
+let FPDPRounding = 1 in {
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
+} // End FPDPRounding = 1
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
let SchedRW = [WriteQuarterRate32] in {
@@ -347,7 +364,9 @@ defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
+let FPDPRounding = 1 in {
defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
+} // End FPDPRounding = 1
}
@@ -495,13 +514,8 @@ defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
// VI
//===----------------------------------------------------------------------===//
-class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
- VOP_DPP <ps.OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
let Inst{8-0} = 0xfa; // dpp
let Inst{16-9} = op;
@@ -539,9 +553,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_NOP : VOP1_Real_vi <0x0>;
@@ -712,9 +727,11 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 5ec1a15c5cd2..e3fd7b5f9fad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -105,6 +105,11 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP2";
}
+class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
+
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -116,22 +121,49 @@ class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
}
-multiclass VOP2Inst <string opName,
- VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
- bit GFX9Renamed = 0> {
-
+multiclass VOP2Inst_e32<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
-
- def _e32 : VOP2_Pseudo <opName, P>,
+ def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
+multiclass VOP2Inst_e64<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
+ let renamedInGFX9 = GFX9Renamed in {
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+multiclass VOP2Inst_sdwa<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> {
+ let renamedInGFX9 = GFX9Renamed in {
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+ } // End renamedInGFX9 = GFX9Renamed
+}
+multiclass VOP2Inst<string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> :
+ VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+ let renamedInGFX9 = GFX9Renamed in {
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
}
@@ -144,12 +176,14 @@ multiclass VOP2bInst <string opName,
let renamedInGFX9 = GFX9Renamed in {
let SchedRW = [Write32Bit, WriteSALU] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
- def _e32 : VOP2_Pseudo <opName, P>,
+ def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -172,6 +206,9 @@ multiclass VOP2eInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -211,9 +248,9 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
- let InsDPP = (ins DstRCDPP:$old,
- Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
@@ -230,21 +267,15 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
- let HasExt = 1;
- let HasSDWA9 = 0;
-}
-def VOP_MAC_F16 : VOP_MAC <f16> {
- // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
- // 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f16>.ret;
+ let HasExt = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 0;
}
-def VOP_MAC_F32 : VOP_MAC <f32> {
- // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
- // 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f32>.ret;
-}
+def VOP_MAC_F16 : VOP_MAC <f16>;
+def VOP_MAC_F32 : VOP_MAC <f32>;
// Write out to vcc or arbitrary SGPR.
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
@@ -290,7 +321,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
- let HasSDWA9 = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 1;
}
// Read in from vcc or arbitrary SGPR
@@ -321,7 +354,9 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
- let HasSDWA9 = 1;
+ let HasExtDPP = 1;
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 1;
}
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -331,8 +366,11 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
+
let HasExt = 0;
- let HasSDWA9 = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
@@ -342,20 +380,23 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
- let HasExt = 0;
- let HasSDWA9 = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
+
+ let HasExt = 0;
+ let HasExtDPP = 0;
+ let HasExtSDWA = 0;
+ let HasExtSDWA9 = 0;
}
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGCN in {
+let SubtargetPredicate = isGCN, Predicates = [isGCN] in {
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
-def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, [], "">;
+def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
@@ -363,29 +404,29 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
-defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
-defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
+defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
+defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
+defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
+defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
-defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
+defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
+defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
+defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
}
-def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">;
+def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -411,11 +452,11 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub
// These are special and do not read the exec mask.
let isConvergent = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
- [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
+ [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
- [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))], "">;
+ [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1
@@ -425,13 +466,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
-} // End SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN, Predicates = [isGCN]
def : GCNPat<
(AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
@@ -444,40 +485,99 @@ def : GCNPat<
>;
// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
} // End isCommutable = 1
-} // End let SubtargetPredicate = SICI
+} // End let SubtargetPredicate = SICI, Predicates = [isSICI]
+
+class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
+ GCNPat<
+ (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
+ !if(!cast<Commutable_REV>(Inst).IsOrig,
+ (Inst $src0, $src1),
+ (Inst $src1, $src0)
+ )
+ >;
+
+let AddedComplexity = 1 in {
+ def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
+ def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
+ def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+ def : DivergentBinOp<add, V_ADD_U32_e32>;
+ def : DivergentBinOp<sub, V_SUB_U32_e32>;
+ def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+}
+
+
+def : DivergentBinOp<add, V_ADD_I32_e32>;
+
+def : DivergentBinOp<add, V_ADD_I32_e64>;
+def : DivergentBinOp<sub, V_SUB_I32_e32>;
+
+def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
+
+def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
+def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
+def : DivergentBinOp<shl, V_LSHLREV_B32_e32>;
+def : DivergentBinOp<adde, V_ADDC_U32_e32>;
+def : DivergentBinOp<sube, V_SUBB_U32_e32>;
+
+class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
+ GCNPat<
+ (getDivergentFrag<Op>.ret i64:$src0, i64:$src1),
+ (REG_SEQUENCE VReg_64,
+ (Inst
+ (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0))
+ ), sub0,
+ (Inst
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1))
+ ), sub1
+ )
+ >;
+
+def : divergent_i64_BinOp <and, V_AND_B32_e32>;
+def : divergent_i64_BinOp <or, V_OR_B32_e32>;
+def : divergent_i64_BinOp <xor, V_XOR_B32_e32>;
let SubtargetPredicate = Has16BitInsts in {
+let FPDPRounding = 1 in {
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
+defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
+} // End FPDPRounding = 1
+
defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
-defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
let isCommutable = 1 in {
+let FPDPRounding = 1 in {
defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
+} // End FPDPRounding = 1
defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
-defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
-defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
@@ -698,13 +798,8 @@ defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
// VI
//===----------------------------------------------------------------------===//
-class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
- VOP_DPP <OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
bits<8> src1;
let Inst{8-0} = 0xfa; //dpp
@@ -716,12 +811,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
-multiclass VOP32_Real_vi <bits<10> op> {
- def _vi :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
-}
-
multiclass VOP2_Real_MADK_vi <bits<6> op> {
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
@@ -791,8 +880,13 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName>;
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ }
}
}
@@ -819,10 +913,14 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ let DecoderNamespace = "SDWA9";
+ }
}
multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
@@ -840,19 +938,23 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
+ let DecoderNamespace = "SDWA9";
+ }
}
} // AssemblerPredicates = [isGFX9]
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
@@ -899,9 +1001,6 @@ defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
-defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
-defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
-
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 26bc5260e17f..4b8c1f208a0e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -17,16 +17,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT src0)))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -35,18 +35,18 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -55,18 +55,18 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -75,18 +75,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
(P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -94,9 +94,9 @@ class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
}
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
- list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
- list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))];
+ list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
+ list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))];
+ list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
ret1));
@@ -185,6 +185,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
getAsm64<HasDst, NumSrcArgs, HasIntClamp,
HasModifiers, HasOMod, DstVT>.ret,
P.Asm64));
+ let NeedPatGen = P.NeedPatGen;
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
@@ -219,7 +220,8 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
// VOP3 INTERP
//===----------------------------------------------------------------------===//
-class VOP3Interp<string OpName, VOPProfile P> : VOP3_Pseudo<OpName, P> {
+class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
}
@@ -291,11 +293,13 @@ def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteDoubleAdd] in {
+let FPDPRounding = 1 in {
def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
-def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>;
-def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum, 1>;
+} // End FPDPRounding = 1
+def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
+def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteQuarterRate32] in {
@@ -323,6 +327,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
let SchedRW = [WriteDouble];
+ let FPDPRounding = 1;
}
} // End Uses = [VCC, EXEC]
@@ -353,10 +358,10 @@ def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CL
def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
-let SchedRW = [WriteDoubleAdd] in {
+let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
-} // End SchedRW = [WriteDoubleAdd]
+} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
@@ -367,6 +372,7 @@ def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32,
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
let AsmMatchConverter = "";
+ let FPDPRounding = 1;
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -381,12 +387,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
let SchedRW = [Write64Bit] in {
// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>;
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
+def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
+def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isSICI
+} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
let SubtargetPredicate = isVI in {
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
@@ -395,6 +401,22 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
} // End SubtargetPredicate = isVI
} // End SchedRW = [Write64Bit]
+let Predicates = [isVI] in {
+def : GCNPat <
+ (getDivergentFrag<shl>.ret i64:$x, i32:$y),
+ (V_LSHLREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<srl>.ret i64:$x, i32:$y),
+ (V_LSHRREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<sra>.ret i64:$x, i32:$y),
+ (V_ASHRREV_I64 $y, $x)
+>;
+}
+
+
let SubtargetPredicate = isCIVI in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
@@ -414,33 +436,51 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
let Predicates = [Has16BitInsts, isVIOnly];
+ let FPDPRounding = 1;
}
def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
let renamedInGFX9 = 1;
let Predicates = [Has16BitInsts, isGFX9];
+ let FPDPRounding = 1;
+}
+
+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+ let Predicates = [Has16BitInsts, isVIOnly];
+ let FPDPRounding = 1;
+}
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+ let renamedInGFX9 = 1;
+ let Predicates = [Has16BitInsts, isGFX9];
+ let FPDPRounding = 1;
}
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
let renamedInGFX9 = 1 in {
-def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
+let FPDPRounding = 1 in {
+def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
+let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
-}
+} // End Uses = [M0, EXEC]
+} // End FPDPRounding = 1
+} // End renamedInGFX9 = 1
let SubtargetPredicate = isGFX9 in {
-def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
+ let FPDPRounding = 1;
+}
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9
+let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@@ -468,6 +508,37 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [Has16BitInsts]
+class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
+ (ops node:$x, node:$y, node:$z),
+ // When the inner operation is used multiple times, selecting 3-op
+ // instructions may still be beneficial -- if the other users can be
+ // combined similarly. Let's be conservative for now.
+ (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
+ [{
+ // Only use VALU ops when the result is divergent.
+ if (!N->isDivergent())
+ return false;
+
+ // Check constant bus limitations.
+ //
+ // Note: Use !isDivergent as a conservative proxy for whether the value
+ // is in an SGPR (uniform values can end up in VGPRs as well).
+ unsigned ConstantBusUses = 0;
+ for (unsigned i = 0; i < 3; ++i) {
+ if (!Operands[i]->isDivergent() &&
+ !isInlineImmediate(Operands[i].getNode())) {
+ ConstantBusUses++;
+ if (ConstantBusUses >= 2)
+ return false;
+ }
+ }
+
+ return true;
+ }]
+> {
+ let PredicateCodeUsesOperands = 1;
+}
+
let SubtargetPredicate = isGFX9 in {
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -502,6 +573,22 @@ def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B3
def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
+
+
+class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
+ // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
+ (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
+ (inst i32:$src0, i32:$src1, i32:$src2)
+>;
+
+def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;
+def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32>;
+def : ThreeOp_i32_Pats<add, add, V_ADD3_U32>;
+def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32>;
+def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
+def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
+def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
+
} // End SubtargetPredicate = isGFX9
//===----------------------------------------------------------------------===//
@@ -651,23 +738,23 @@ defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
multiclass VOP3_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3Interp_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
- VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
@@ -775,12 +862,15 @@ defm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>;
defm V_INTERP_P2_F16 : VOP3Interp_F16_Real_vi <0x276>;
+let FPDPRounding = 1 in {
defm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">;
-defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
-defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">;
defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">;
defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">;
+} // End FPDPRounding = 1
+
+defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
+defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
@@ -813,6 +903,9 @@ defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>;
defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>;
defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>;
+defm V_READLANE_B32 : VOP3_Real_vi <0x289>;
+defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>;
+
defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index b51828b54679..91b45583c848 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -42,14 +42,16 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
}
let isCommutable = 1 in {
-def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+let FPDPRounding = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
-def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
-def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+} // End FPDPRounding = 1
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
@@ -137,12 +139,14 @@ let SubtargetPredicate = HasMadMixInsts in {
let isCommutable = 1 in {
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
+let FPDPRounding = 1 in {
// Clamp modifier is applied after conversion to f16.
def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
let ClampLo = 0, ClampHi = 1 in {
def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
}
+} // End FPDPRounding = 1
}
defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
@@ -154,18 +158,99 @@ let SubtargetPredicate = HasFmaMixInsts in {
let isCommutable = 1 in {
def V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
+let FPDPRounding = 1 in {
// Clamp modifier is applied after conversion to f16.
def V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
let ClampLo = 0, ClampHi = 1 in {
def V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
}
+} // End FPDPRounding = 1
}
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
-let SubtargetPredicate = HasDLInsts in {
+// Defines patterns that extract signed 4bit from each Idx[0].
+foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
+ def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
+ (sra (shl node:$src, (i32 Idx[1])), (i32 28))>;
+
+// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex.
+class Extract<int FromBitIndex, int BitMask, bit U>: PatFrag<
+ (ops node:$src),
+ !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element
+ !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))),
+ !if (!eq (FromBitIndex, 0), // first element
+ !if (U, (and node:$src, (i32 BitMask)),
+ !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+ (sext_inreg node:$src, i8))),
+ !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)),
+ !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+ (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>;
+
+
+foreach Type = ["I", "U"] in
+ foreach Index = 0-3 in {
+ // Defines patterns that extract each Index'ed 8bit from an unsigned
+ // 32bit scalar value;
+ def #Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
+
+ // Defines multiplication patterns where the multiplication is happening on each
+ // Index'ed 8bit of a 32bit scalar value.
+
+ def Mul#Type#_Elt#Index : PatFrag<
+ (ops node:$src0, node:$src1),
+ (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
+ (!cast<Extract>(#Type#Index#"_8bit") node:$src0),
+ (!cast<Extract>(#Type#Index#"_8bit") node:$src1))>;
+ }
+
+// Different variants of dot8 patterns cause a huge increase in the compile time.
+// Define non-associative/commutative add/mul to prevent permutation in the dot8
+// pattern.
+def NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>;
+def NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>;
+
+def NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>;
+def NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>;
+
+def NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>;
+def NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>;
+
+foreach Type = ["I", "U"] in
+ foreach Index = 0-7 in {
+ // Defines patterns that extract each Index'ed 4bit from an unsigned
+ // 32bit scalar value;
+ def #Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
+
+ // Defines multiplication patterns where the multiplication is happening on each
+ // Index'ed 8bit of a 32bit scalar value.
+ def Mul#Type#Index#"_4bit" : PatFrag<
+ (ops node:$src0, node:$src1),
+ (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
+ (!cast<Extract>(#Type#Index#"_4bit") node:$src0),
+ (!cast<Extract>(#Type#Index#"_4bit") node:$src1))>;
+ }
+
+class UDot2Pat<Instruction Inst> : GCNPat <
+ (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
+ (srl i32:$src1, (i32 16))), i32:$src2),
+ (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
+ (and i32:$src1, (i32 65535)))
+ ),
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+class SDot2Pat<Instruction Inst> : GCNPat <
+ (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
+ (sra i32:$src1, (i32 16))), i32:$src2),
+ (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
+ (sext_inreg i32:$src1, i16))),
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+let SubtargetPredicate = HasDotInsts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
@@ -192,7 +277,32 @@ defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
-} // End SubtargetPredicate = HasDLInsts
+def : UDot2Pat<V_DOT2_U32_U16>;
+def : SDot2Pat<V_DOT2_I32_I16>;
+
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
+ (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [1, 2, 3, 4, 5, 6, 7], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
+// in the compile time. Directly handle the pattern generated by the FE here.
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [7, 1, 2, 3, 4, 5, 6], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+} // End SubtargetPredicate = HasDotInsts
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -242,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
}
-let SubtargetPredicate = HasDLInsts in {
+let SubtargetPredicate = HasDotInsts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
@@ -252,4 +362,4 @@ defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
-} // End SubtargetPredicate = HasDLInsts
+} // End SubtargetPredicate = HasDotInsts
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index cc6b8116afee..091cac8cd35c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -635,6 +635,17 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
@@ -656,6 +667,14 @@ def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
@@ -670,6 +689,13 @@ def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f0f7f259f71d..7de7d90d27b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -420,10 +420,10 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let SDWA = 1;
let Uses = [EXEC];
- let SubtargetPredicate = !if(P.HasExt, HasSDWA, DisableInst);
- let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
- let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
- AMDGPUAsmVariants.Disable);
+ let SubtargetPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+ let AssemblerPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+ let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
+ AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA";
VOPProfile Pfl = P;
@@ -471,10 +471,10 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
- let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
- let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9,
- AMDGPUAsmVariants.Disable);
+ let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+ let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+ let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
+ AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA9";
// Copy relevant pseudo op flags
@@ -505,9 +505,14 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
let Inst{63-60} = row_mask;
}
-class VOP_DPP <string OpName, VOPProfile P> :
- InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
- VOP_DPPe<P> {
+class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
+ VOP <OpName>,
+ SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE>,
+ MnemonicAlias <OpName#"_dpp", OpName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
let mayLoad = 0;
let mayStore = 0;
@@ -517,15 +522,99 @@ class VOP_DPP <string OpName, VOPProfile P> :
let VALU = 1;
let DPP = 1;
let Size = 8;
+ let Uses = [EXEC];
+ let isConvergent = 1;
+
+ string Mnemonic = OpName;
+ string AsmOperands = P.AsmDPP;
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
let SubtargetPredicate = HasDPP;
- let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
- let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
- AMDGPUAsmVariants.Disable);
+ let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+ let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
let DecoderNamespace = "DPP";
+
+ VOPProfile Pfl = P;
+}
+
+class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ // Copy relevant pseudo op flags
+ let isConvergent = ps.isConvergent;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AssemblerPredicate = ps.AssemblerPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let DecoderNamespace = ps.DecoderNamespace;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+}
+
+class getNumNodeArgs<SDPatternOperator Op> {
+ SDNode N = !cast<SDNode>(Op);
+ SDTypeProfile TP = N.TypeProfile;
+ int ret = TP.NumOperands;
+}
+
+
+class getDivergentFrag<SDPatternOperator Op> {
+
+ int NumSrcArgs = getNumNodeArgs<Op>.ret;
+ PatFrag ret = PatFrag <
+ !if(!eq(NumSrcArgs, 1),
+ (ops node:$src0),
+ !if(!eq(NumSrcArgs, 2),
+ (ops node:$src0, node:$src1),
+ (ops node:$src0, node:$src1, node:$src2))),
+ !if(!eq(NumSrcArgs, 1),
+ (Op $src0),
+ !if(!eq(NumSrcArgs, 2),
+ (Op $src0, $src1),
+ (Op $src0, $src1, $src2))),
+ [{ return N->isDivergent(); }]
+ >;
+}
+
+class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
+
+ PatFrag Operator = getDivergentFrag < Op >.ret;
+
+ dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator,
+ !subst(P.Src0RC32, P.Src0VT,
+ !subst(P.Src1RC32, P.Src1VT, tmp))));
+
+
+ dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set,
+ !subst(P.DstRC, P.DstVT, tmp)));
+
+ list<dag> ret = [!con(Outs, (set Ins))];
+}
+
+class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
+ list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
+}
+
+class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
+ SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern),
+ !if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
}
include "VOPCInstructions.td"
diff --git a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 1acae3a88870..6f5bbd3b4ef3 100644
--- a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -26,12 +26,6 @@ static Reloc::Model getRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
/// ARCTargetMachine ctor - Create an ILP32 architecture model
ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -43,7 +37,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
"e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
"f32:32:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
index 0c627d04698b..9c820c2fc595 100644
--- a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -29,6 +28,12 @@ using namespace llvm;
#include "ARCGenAsmWriter.inc"
+template <class T>
+static const char *BadConditionCode(T cc) {
+ LLVM_DEBUG(dbgs() << "Unknown condition code passed: " << cc << "\n");
+ return "{unknown-cc}";
+}
+
static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
switch (BRCC) {
case ARCCC::BREQ:
@@ -44,7 +49,7 @@ static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
case ARCCC::BRHS:
return "hs";
}
- llvm_unreachable("Unhandled ARCCC::BRCondCode");
+ return BadConditionCode(BRCC);
}
static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
@@ -86,7 +91,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
case ARCCC::Z:
return "z";
}
- llvm_unreachable("Unhandled ARCCC::CondCode");
+ return BadConditionCode(CC);
}
void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 2e62a0790418..3db60f1c16d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -61,6 +61,11 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"floating point",
[FeatureFPARMv8]>;
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+ "Enable full half-precision "
+ "floating point fml instructions",
+ [FeatureFullFP16]>;
+
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports "
"single precision only">;
@@ -194,6 +199,10 @@ def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp",
+ "UseWideStrideVFP", "true",
+ "Use a wide stride when allocating VFP registers">;
+
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
@@ -256,6 +265,9 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2",
+ "Prefer 32-bit alignment for loops">;
+
/// Some instructions update CPSR partially, which can add false dependency for
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
@@ -351,6 +363,11 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
+// Armv8.5-A extensions
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
+ "Enable v8.5a Speculation Barrier" >;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -440,6 +457,10 @@ def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions",
[HasV8_3aOps, FeatureDotProd]>;
+def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
+ "Support ARM v8.5a instructions",
+ [HasV8_4aOps, FeatureSB]>;
+
//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -482,8 +503,25 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
-def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
- "Samsung Exynos-Mx processors", []>;
+def ProcExynos : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos",
+ "Samsung Exynos processors",
+ [FeatureZCZeroing,
+ FeatureUseWideStrideVFP,
+ FeatureUseAA,
+ FeatureSplatVFPToNeon,
+ FeatureSlowVGETLNi32,
+ FeatureSlowVDUP32,
+ FeatureSlowFPBrcc,
+ FeatureProfUnpredicate,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureHasRetAddrStack,
+ FeatureFuseLiterals,
+ FeatureFuseAES,
+ FeatureExpandMLx,
+ FeatureCrypto,
+ FeatureCRC]>;
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
"Cortex-R4 ARM processors", []>;
@@ -659,6 +697,20 @@ def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps,
FeatureRAS,
FeatureDotProd]>;
+def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
@@ -865,6 +917,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
FeatureVFP4,
+ FeatureUseWideStrideVFP,
FeatureMP,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -926,6 +979,7 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
ProcM3,
+ FeaturePrefLoopAlign32,
FeatureHasNoBranchPredictor]>;
def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
@@ -936,6 +990,8 @@ def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
FeatureVFP4,
FeatureVFPOnlySP,
FeatureD16,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
@@ -950,6 +1006,8 @@ def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
FeatureFPARMv8,
FeatureD16,
FeatureVFPOnlySP,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
@@ -985,7 +1043,7 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
FeatureAvoidPartialCPSR,
FeatureCheapPredicableCPSR]>;
-def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
@@ -1017,29 +1075,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing,
FeatureNoPostRASched]>;
-def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC]>;
+def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos,
+ FeatureFullFP16,
+ FeatureDotProd]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
FeatureHWDivThumb,
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index b227eaed8d61..b7cd3a0c2dae 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -367,6 +367,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
+ bool FirstHalf;
+ const ARMBaseTargetMachine &ATM =
+ static_cast<const ARMBaseTargetMachine &>(TM);
+
+ // 'Q' should correspond to the low order register and 'R' to the high
+ // order register. Whether this corresponds to the upper or lower half
+ // depends on the endianess mode.
+ if (ExtraCode[0] == 'Q')
+ FirstHalf = ATM.isLittleEndian();
+ else
+ // ExtraCode[0] == 'R'.
+ FirstHalf = !ATM.isLittleEndian();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
@@ -376,14 +388,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MO.isReg())
return true;
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
+ unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ?
ARM::gsub_0 : ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
if (NumVals != 2)
return true;
- unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+ unsigned RegOp = FirstHalf ? OpNum : OpNum + 1;
if (RegOp >= MI->getNumOperands())
return true;
const MachineOperand &MO = MI->getOperand(RegOp);
@@ -815,15 +827,31 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
- bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+ bool IsIndirect =
+ (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB));
if (!IsIndirect)
return getSymbol(GV);
SmallString<128> Name;
- Name = "__imp_";
+ if (TargetFlags & ARMII::MO_DLLIMPORT)
+ Name = "__imp_";
+ else if (TargetFlags & ARMII::MO_COFFSTUB)
+ Name = ".refptr.";
getNameWithPrefix(Name, GV);
- return OutContext.getOrCreateSymbol(Name);
+ MCSymbol *MCSym = OutContext.getOrCreateSymbol(Name);
+
+ if (TargetFlags & ARMII::MO_COFFSTUB) {
+ MachineModuleInfoCOFF &MMICOFF =
+ MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMICOFF.getGVStubEntry(MCSym);
+
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), true);
+ }
+
+ return MCSym;
} else if (Subtarget->isTargetELF()) {
return getSymbol(GV);
}
@@ -1043,10 +1071,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterInfo *TargetRegInfo =
+ MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
@@ -1103,7 +1133,9 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
if (MO.isUndef()) {
assert(RegList.empty() &&
"Pad registers must come before restored ones");
- Pad += 4;
+ unsigned Width =
+ TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8;
+ Pad += Width;
continue;
}
RegList.push_back(MO.getReg());
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1c2031c7d7b..bbebed59c851 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -708,8 +708,12 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return MCID.getSize();
// If this machine instr is an inline asm, measure it.
- if (MI.getOpcode() == ARM::INLINEASM)
- return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (MI.getOpcode() == ARM::INLINEASM) {
+ unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+ Size = alignTo(Size, 4);
+ return Size;
+ }
unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
@@ -935,9 +939,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-bool ARMBaseInstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
// VMOVRRD is also a copy instruction but it requires
// special way of handling. It is more complex copy version
// and since that we are not considering it. For recognition
@@ -971,8 +975,6 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -984,7 +986,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (TRI->getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRH))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -995,14 +997,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::STRi12))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -1013,7 +1015,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
@@ -1021,7 +1023,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
@@ -1029,7 +1031,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
@@ -1043,14 +1045,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addMemOperand(MMO)
@@ -1063,14 +1065,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+ get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1086,14 +1089,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
.addFrameIndex(FI)
.addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+ get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1107,7 +1111,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
@@ -1172,8 +1176,14 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- const MachineMemOperand *Dummy;
- return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return true;
+ }
+ return false;
}
void ARMBaseInstrInfo::
@@ -1386,8 +1396,14 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- const MachineMemOperand *Dummy;
- return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return true;
+ }
+ return false;
}
/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
@@ -1432,9 +1448,8 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
- llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
- [&TRI](const unsigned &Reg1,
- const unsigned &Reg2) -> bool {
+ llvm::sort(ScratchRegs,
+ [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
return TRI.getEncodingValue(Reg1) <
TRI.getEncodingValue(Reg2);
});
@@ -1590,11 +1605,10 @@ void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
unsigned CPI = Orig.getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
- MachineInstrBuilder MIB =
- BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
- .addConstantPoolIndex(CPI)
- .addImm(PCLabelId);
- MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
+ BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
+ .addConstantPoolIndex(CPI)
+ .addImm(PCLabelId)
+ .cloneMemRefs(Orig);
break;
}
}
@@ -2185,6 +2199,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::tSUBSi8, ARM::tSUBi8},
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
+ {ARM::tRSBS, ARM::tRSB},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
@@ -2949,6 +2964,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+ MI->clearRegisterDeads(ARM::CPSR);
+
return true;
}
@@ -4534,9 +4551,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
- .add(predOps(ARMCC::AL));
+ .addImm(0)
+ .cloneMemRefs(*MI)
+ .add(predOps(ARMCC::AL));
}
bool
@@ -5061,3 +5078,32 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
}
llvm_unreachable("Target dependent opcode missing");
}
+
+std::pair<unsigned, unsigned>
+ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = ARMII::MO_OPTION_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace ARMII;
+
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace ARMII;
+
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_COFFSTUB, "arm-coffstub"},
+ {MO_GOT, "arm-got"},
+ {MO_SBREL, "arm-sbrel"},
+ {MO_DLLIMPORT, "arm-dllimport"},
+ {MO_SECREL, "arm-secrel"},
+ {MO_NONLAZY, "arm-nonlazy"}};
+ return makeArrayRef(TargetFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b54be15097b1..de1f307083ba 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -101,6 +101,12 @@ protected:
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -201,9 +207,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -331,6 +334,13 @@ public:
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr &MI) const;
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 5342e6e2cd13..02b3daf3c6fd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -209,6 +209,11 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool ARMBaseRegisterInfo::
+isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const {
+ return !getReservedRegs(MF).test(PhysReg);
+}
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index f755f66a0f3a..45d29ebc0bd3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -131,6 +131,8 @@ public:
CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ unsigned PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -154,7 +156,6 @@ public:
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
bool hasBasePointer(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 47f998b696f5..8e80c32bcf89 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -237,7 +237,7 @@ void ARMCallLowering::splitToValueTypes(
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg,
+ const Value *Val, ArrayRef<unsigned> VRegs,
MachineInstrBuilder &Ret) const {
if (!Val)
// Nothing to do here.
@@ -251,16 +251,24 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
- SmallVector<ArgInfo, 4> SplitVTs;
- SmallVector<unsigned, 4> Regs;
- ArgInfo RetInfo(VReg, Val->getType());
- setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
- Regs.push_back(Reg);
- });
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
- if (Regs.size() > 1)
- MIRBuilder.buildUnmerge(Regs, VReg);
+ SmallVector<ArgInfo, 4> SplitVTs;
+ LLVMContext &Ctx = Val->getType()->getContext();
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<unsigned, 4> Regs;
+ splitToValueTypes(
+ CurArgInfo, SplitVTs, MF,
+ [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
+ if (Regs.size() > 1)
+ MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+ }
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -270,14 +278,15 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- assert(!Val == !VReg && "Return value without a vreg");
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
+ assert(!Val == VRegs.empty() && "Return value without a vreg");
auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
unsigned Opcode = ST.getReturnOpcode();
auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL));
- if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
+ if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
return false;
MIRBuilder.insertInstr(Ret);
@@ -420,7 +429,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
auto &TLI = *getTLI<ARMTargetLowering>();
auto Subtarget = TLI.getSubtarget();
- if (Subtarget->isThumb())
+ if (Subtarget->isThumb1Only())
return false;
// Quick exit if there aren't any args
@@ -491,6 +500,22 @@ struct CallReturnHandler : public IncomingValueHandler {
MachineInstrBuilder MIB;
};
+// FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
+unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) {
+ if (isDirect)
+ return STI.isThumb() ? ARM::tBL : ARM::BL;
+
+ if (STI.isThumb())
+ return ARM::tBLXr;
+
+ if (STI.hasV5TOps())
+ return ARM::BLX;
+
+ if (STI.hasV4TOps())
+ return ARM::BX_CALL;
+
+ return ARM::BMOVPCRX_CALL;
+}
} // end anonymous namespace
bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
@@ -508,27 +533,34 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (STI.genLongCalls())
return false;
+ if (STI.isThumb1Only())
+ return false;
+
auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
bool isDirect = !Callee.isReg();
- auto CallOpcode =
- isDirect ? ARM::BL
- : STI.hasV5TOps()
- ? ARM::BLX
- : STI.hasV4TOps() ? ARM::BX_CALL : ARM::BMOVPCRX_CALL;
- auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode)
- .add(Callee)
- .addRegMask(TRI->getCallPreservedMask(MF, CallConv));
- if (Callee.isReg()) {
+ auto CallOpcode = getCallOpcode(STI, isDirect);
+ auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
+
+ bool isThumb = STI.isThumb();
+ if (isThumb)
+ MIB.add(predOps(ARMCC::AL));
+
+ MIB.add(Callee);
+ if (!isDirect) {
auto CalleeReg = Callee.getReg();
- if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
- MIB->getOperand(0).setReg(constrainOperandRegClass(
+ if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
+ unsigned CalleeIdx = isThumb ? 2 : 0;
+ MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
- *MIB.getInstr(), MIB->getDesc(), Callee, 0));
+ *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
+ }
}
+ MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
+
SmallVector<ArgInfo, 8> ArgInfos;
for (auto Arg : OrigArgs) {
if (!isSupportedType(DL, TLI, Arg.Ty))
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
index 86854c53f179..45a988a2f00e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -33,8 +33,8 @@ class ARMCallLowering : public CallLowering {
public:
ARMCallLowering(const ARMTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
@@ -45,7 +45,8 @@ public:
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
- unsigned VReg, MachineInstrBuilder &Ret) const;
+ ArrayRef<unsigned> VRegs,
+ MachineInstrBuilder &Ret) const;
using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427a..b631c2bc687b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -54,47 +54,108 @@ EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
cl::desc("Use DSP instructions for scalar operations\
with immediate operands"));
-namespace {
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+// define hidden i32 @cmp(i8 zeroext) {
+// %2 = add i8 %0, -49
+// %3 = icmp ult i8 %2, 3
+// ..
+// }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+// t19: i32 = add t4, Constant:i32<-49>
+// t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+// subs r0, #49
+// uxtb r1, r0
+// cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+// sub.w r1, r0, #49
+// cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+// define i32 @cmp(i8 zeroext %c) {
+// %0 = zext i8 %c to i32
+// %c.off = add i32 %0, -49
+// %1 = icmp ult i32 %c.off, 3
+// ..
+// }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+namespace {
class IRPromoter {
SmallPtrSet<Value*, 8> NewInsts;
- SmallVector<Instruction*, 4> InstsToRemove;
+ SmallPtrSet<Instruction*, 4> InstsToRemove;
+ DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
+ SmallPtrSet<Value*, 8> Promoted;
Module *M = nullptr;
LLVMContext &Ctx;
+ IntegerType *ExtTy = nullptr;
+ IntegerType *OrigTy = nullptr;
+ SmallPtrSetImpl<Value*> *Visited;
+ SmallPtrSetImpl<Value*> *Sources;
+ SmallPtrSetImpl<Instruction*> *Sinks;
+ SmallPtrSetImpl<Instruction*> *SafeToPromote;
+
+ void ReplaceAllUsersOfWith(Value *From, Value *To);
+ void PrepareConstants(void);
+ void ExtendSources(void);
+ void ConvertTruncs(void);
+ void PromoteTree(void);
+ void TruncateSinks(void);
+ void Cleanup(void);
public:
- IRPromoter(Module *M) : M(M), Ctx(M->getContext()) { }
+ IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
+ ExtTy(Type::getInt32Ty(Ctx)) { }
- void Cleanup() {
- for (auto *I : InstsToRemove) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
- I->dropAllReferences();
- I->eraseFromParent();
- }
- InstsToRemove.clear();
- NewInsts.clear();
- }
void Mutate(Type *OrigTy,
SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots);
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks,
+ SmallPtrSetImpl<Instruction*> &SafeToPromote);
};
class ARMCodeGenPrepare : public FunctionPass {
const ARMSubtarget *ST = nullptr;
IRPromoter *Promoter = nullptr;
std::set<Value*> AllVisited;
- Type *OrigTy = nullptr;
- unsigned TypeSize = 0;
+ SmallPtrSet<Instruction*, 8> SafeToPromote;
- bool isNarrowInstSupported(Instruction *I);
+ bool isSafeOverflow(Instruction *I);
bool isSupportedValue(Value *V);
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V);
public:
static char ID;
+ static unsigned TypeSize;
+ Type *OrigTy = nullptr;
ARMCodeGenPrepare() : FunctionPass(ID) {}
@@ -111,8 +172,7 @@ public:
}
-/// Can the given value generate sign bits.
-static bool isSigned(Value *V) {
+static bool generateSignBits(Value *V) {
if (!isa<Instruction>(V))
return false;
@@ -121,120 +181,226 @@ static bool isSigned(Value *V) {
Opc == Instruction::SRem;
}
+static bool EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+}
+
+static bool GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
+}
+
/// Some instructions can use 8- and 16-bit operands, and we don't need to
/// promote anything larger. We disallow booleans to make life easier when
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
/// types are accepted so we can handle switches.
static bool isSupportedType(Value *V) {
- if (V->getType()->isVoidTy())
+ Type *Ty = V->getType();
+
+ // Allow voids and pointers, these won't be promoted.
+ if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
- const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
- if (!IntTy)
- return false;
+ if (auto *Ld = dyn_cast<LoadInst>(V))
+ Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
- // Don't try to promote boolean values.
- if (IntTy->getBitWidth() == 1)
+ if (!isa<IntegerType>(Ty) ||
+ cast<IntegerType>(V->getType())->getBitWidth() == 1)
return false;
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- return isSupportedType(ZExt->getOperand(0));
+ return LessOrEqualTypeSize(V);
+}
- return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+
+ // TODO Allow zext to be sources.
+ if (isa<Argument>(V))
+ return true;
+ else if (isa<LoadInst>(V))
+ return true;
+ else if (isa<BitCastInst>(V))
+ return true;
+ else if (auto *Call = dyn_cast<CallInst>(V))
+ return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ else if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return EqualTypeSize(Trunc);
+ return false;
}
/// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
static bool isSink(Value *V) {
- auto UsesNarrowValue = [](Value *V) {
- return V->getType()->getScalarSizeInBits() <= 32;
- };
-
+ // TODO The truncate also isn't actually necessary because we would already
+ // proved that the data value is kept within the range of the original data
+ // type.
+
+ // Sinks are:
+ // - points where the value in the register is being observed, such as an
+ // icmp, switch or store.
+ // - points where value types have to match, such as calls and returns.
+ // - zext are included to ease the transformation and are generally removed
+ // later on.
if (auto *Store = dyn_cast<StoreInst>(V))
- return UsesNarrowValue(Store->getValueOperand());
+ return LessOrEqualTypeSize(Store->getValueOperand());
if (auto *Return = dyn_cast<ReturnInst>(V))
- return UsesNarrowValue(Return->getReturnValue());
+ return LessOrEqualTypeSize(Return->getReturnValue());
+ if (auto *ZExt = dyn_cast<ZExtInst>(V))
+ return GreaterThanTypeSize(ZExt);
+ if (auto *Switch = dyn_cast<SwitchInst>(V))
+ return LessThanTypeSize(Switch->getCondition());
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
return isa<CallInst>(V);
}
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
- if (isa<Argument>(V) && isSupportedType(V))
- return true;
- else if (isa<TruncInst>(V))
- return true;
- else if (auto *ZExt = dyn_cast<ZExtInst>(V))
- // ZExt can be a leaf if its the only user of a load.
- return isa<LoadInst>(ZExt->getOperand(0)) &&
- ZExt->getOperand(0)->hasOneUse();
- else if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Load = dyn_cast<LoadInst>(V)) {
- if (!isa<IntegerType>(Load->getType()))
- return false;
- // A load is a leaf, unless its already just being zext'd.
- if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
- return false;
-
- return true;
- }
- return false;
-}
-
/// Return whether the instruction can be promoted within any modifications to
-/// it's operands or result.
-static bool isSafeOverflow(Instruction *I) {
+/// its operands or result.
+bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
+ // FIXME Do we need NSW too?
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
return true;
+ // We can support a, potentially, overflowing instruction (I) if:
+ // - It is only used by an unsigned icmp.
+ // - The icmp uses a constant.
+ // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+ // around zero to become a larger number than before.
+ // - The underflowing instruction (I) also uses a constant.
+ //
+ // We can then use the two constants to calculate whether the result would
+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+ // just underflows the range, the icmp would give the same result whether the
+ // result has been truncated or not. We calculate this by:
+ // - Zero extending both constants, if needed, to 32-bits.
+ // - Take the absolute value of I's constant, adding this to the icmp const.
+ // - Check that this value is not out of range for small type. If it is, it
+ // means that it has underflowed enough to wrap around the icmp constant.
+ //
+ // For example:
+ //
+ // %sub = sub i8 %a, 2
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -2 == FE == 254
+ // But if this is evalulated as a i32
+ // %sub = -2 == FF FF FF FE == 4294967294
+ // So the unsigned compares (i8 and i32) would not yield the same result.
+ //
+ // Another way to look at it is:
+ // %a - 2 <= 254
+ // %a + 2 <= 254 + 2
+ // %a <= 256
+ // And we can't represent 256 in the i8 format, so we don't support it.
+ //
+ // Whereas:
+ //
+ // %sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -1 == FF == 255
+ // As i32:
+ // %sub = -1 == FF FF FF FF == 4294967295
+ //
+ // In this case, the unsigned compare results would be the same and this
+ // would also be true for ult, uge and ugt:
+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ //
+ // To demonstrate why we can't handle increasing values:
+ //
+ // %add = add i8 %a, 2
+ // %cmp = icmp ult i8 %add, 127
+ //
+ // If %a = 254, %add = 256 == (i8 1)
+ // As i32:
+ // %add = 256
+ //
+ // (1 < 127) != (256 < 127)
+
unsigned Opc = I->getOpcode();
- if (Opc == Instruction::Add || Opc == Instruction::Sub) {
- // We don't care if the add or sub could wrap if the value is decreasing
- // and is only being used by an unsigned compare.
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
- !isa<ConstantInt>(I->getOperand(1)))
- return false;
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ return false;
- auto *CI = cast<ICmpInst>(*I->user_begin());
- if (CI->isSigned())
- return false;
+ if (!I->hasOneUse() ||
+ !isa<ICmpInst>(*I->user_begin()) ||
+ !isa<ConstantInt>(I->getOperand(1)))
+ return false;
- bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
+ ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+ bool NegImm = OverflowConst->isNegative();
+ bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+ ((Opc == Instruction::Add) && NegImm);
+ if (!IsDecreasing)
+ return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
- return true;
- }
+ // Don't support an icmp that deals with sign bits.
+ auto *CI = cast<ICmpInst>(*I->user_begin());
+ if (CI->isSigned() || CI->isEquality())
+ return false;
- // Otherwise, if an instruction is using a negative immediate we will need
- // to fix it up during the promotion.
- for (auto &Op : I->operands()) {
- if (auto *Const = dyn_cast<ConstantInt>(Op))
- if (Const->isNegative())
- return false;
- }
- return false;
+ ConstantInt *ICmpConst = nullptr;
+ if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ ICmpConst = Const;
+ else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ ICmpConst = Const;
+ else
+ return false;
+
+ // Now check that the result can't wrap on itself.
+ APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+ ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+ Total += OverflowConst->getValue().getBitWidth() < 32 ?
+ OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+ APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+
+ if (Total.getBitWidth() > Max.getBitWidth()) {
+ if (Total.ugt(Max.zext(Total.getBitWidth())))
+ return false;
+ } else if (Max.getBitWidth() > Total.getBitWidth()) {
+ if (Total.zext(Max.getBitWidth()).ugt(Max))
+ return false;
+ } else if (Total.ugt(Max))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+ return true;
}
static bool shouldPromote(Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
- if (!isa<IntegerType>(V->getType()))
- return false;
+ if (isSource(V))
+ return true;
- if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
- isa<ICmpInst>(I))
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
return false;
- if (auto *ZExt = dyn_cast<ZExtInst>(I))
- return !ZExt->getDestTy()->isIntegerTy(32);
+ if (isa<ICmpInst>(I))
+ return false;
return true;
}
@@ -245,24 +411,16 @@ static bool isPromotedResultSafe(Value *V) {
if (!isa<Instruction>(V))
return true;
- if (isSigned(V))
+ if (generateSignBits(V))
return false;
- // If I is only being used by something that will require its value to be
- // truncated, then we don't care about the promoted result.
- auto *I = cast<Instruction>(V);
- if (I->hasOneUse() && isSink(*I->use_begin()))
- return true;
-
- if (isa<OverflowingBinaryOperator>(I))
- return isSafeOverflow(I);
- return true;
+ return !isa<OverflowingBinaryOperator>(V);
}
/// Return the intrinsic for the instruction that can perform the same
/// operation but on a narrow type. This is using the parallel dsp intrinsics
/// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
// Whether we use the signed or unsigned versions of these intrinsics
// doesn't matter because we're not using the GE bits that they set in
// the APSR.
@@ -270,124 +428,163 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
default:
break;
case Instruction::Add:
- return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
Intrinsic::arm_uadd8;
case Instruction::Sub:
- return TypeSize == 16 ? Intrinsic::arm_usub16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
Intrinsic::arm_usub8;
}
llvm_unreachable("unhandled opcode for narrow intrinsic");
}
-void IRPromoter::Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
- SmallPtrSetImpl<Value*> &Leaves,
- SmallPtrSetImpl<Instruction*> &Roots) {
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+ SmallVector<Instruction*, 4> Users;
+ Instruction *InstTo = dyn_cast<Instruction>(To);
+ bool ReplacedAll = true;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+ << "\n");
+
+ for (Use &U : From->uses()) {
+ auto *User = cast<Instruction>(U.getUser());
+ if (InstTo && User->isIdenticalTo(InstTo)) {
+ ReplacedAll = false;
+ continue;
+ }
+ Users.push_back(User);
+ }
+
+ for (auto *U : Users)
+ U->replaceUsesOfWith(From, To);
+
+ if (ReplacedAll)
+ if (auto *I = dyn_cast<Instruction>(From))
+ InstsToRemove.insert(I);
+}
+
+void IRPromoter::PrepareConstants() {
IRBuilder<> Builder{Ctx};
- Type *ExtTy = Type::getInt32Ty(M->getContext());
- unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
- SmallPtrSet<Value*, 8> Promoted;
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
- << " to 32-bits\n");
-
- auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
- SmallVector<Instruction*, 4> Users;
- Instruction *InstTo = dyn_cast<Instruction>(To);
- for (Use &U : From->uses()) {
- auto *User = cast<Instruction>(U.getUser());
- if (InstTo && User->isIdenticalTo(InstTo))
+ // First step is to prepare the instructions for mutation. Most constants
+ // just need to be zero extended into their new type, but complications arise
+ // because:
+ // - For nuw binary operators, negative immediates would need sign extending;
+ // however, instead we'll change them to positive and zext them. We can do
+ // this because:
+ // > The operators that can wrap are: add, sub, mul and shl.
+ // > shl interprets its second operand as unsigned and if the first operand
+ // is an immediate, it will need zext to be nuw.
+ // > I'm assuming mul has to interpret immediates as unsigned for nuw.
+ // > Which leaves the nuw add and sub to be handled; as with shl, if an
+ // immediate is used as operand 0, it will need zext to be nuw.
+ // - We also allow add and sub to safely overflow in certain circumstances
+ // and only when the value (operand 0) is being decreased.
+ //
+ // For adds and subs, that are either nuw or safely wrap and use a negative
+ // immediate as operand 1, we create an equivalent instruction using a
+ // positive immediate. That positive immediate can then be zext along with
+ // all the other immediates later.
+ for (auto *V : *Visited) {
+ if (!isa<Instruction>(V))
+ continue;
+
+ auto *I = cast<Instruction>(V);
+ if (SafeToPromote->count(I)) {
+
+ if (!isa<OverflowingBinaryOperator>(I))
continue;
- Users.push_back(User);
- }
- for (auto &U : Users)
- U->replaceUsesOfWith(From, To);
- };
+ if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!Const->isNegative())
+ break;
- auto FixConst = [&](ConstantInt *Const, Instruction *I) {
- Constant *NewConst = nullptr;
- if (isSafeOverflow(I)) {
- NewConst = (Const->isNegative()) ?
- ConstantExpr::getSExt(Const, ExtTy) :
- ConstantExpr::getZExt(Const, ExtTy);
- } else {
- uint64_t NewVal = *Const->getValue().getRawData();
- if (Const->getType() == Type::getInt16Ty(Ctx))
- NewVal &= 0xFFFF;
- else
- NewVal &= 0xFF;
- NewConst = ConstantInt::get(ExtTy, NewVal);
+ unsigned Opc = I->getOpcode();
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+ auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+ Builder.SetInsertPoint(I);
+ Value *NewVal = Opc == Instruction::Sub ?
+ Builder.CreateAdd(I->getOperand(0), NewConst) :
+ Builder.CreateSub(I->getOperand(0), NewConst);
+ LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
+
+ if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+ NewInst->copyIRFlags(I);
+ NewInsts.insert(NewInst);
+ }
+ InstsToRemove.insert(I);
+ I->replaceAllUsesWith(NewVal);
+ }
}
- I->replaceUsesOfWith(Const, NewConst);
- };
+ }
+ for (auto *I : NewInsts)
+ Visited->insert(I);
+}
- auto InsertDSPIntrinsic = [&](Instruction *I) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
- << *I << "\n");
- Function *DSPInst =
- Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
- Builder.SetInsertPoint(I);
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
- Value *Args[] = { I->getOperand(0), I->getOperand(1) };
- CallInst *Call = Builder.CreateCall(DSPInst, Args);
- ReplaceAllUsersOfWith(I, Call);
- InstsToRemove.push_back(I);
- NewInsts.insert(Call);
- };
+void IRPromoter::ExtendSources() {
+ IRBuilder<> Builder{Ctx};
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+ assert(V->getType() != ExtTy && "zext already extends to i32");
LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
Builder.SetInsertPoint(InsertPt);
if (auto *I = dyn_cast<Instruction>(V))
Builder.SetCurrentDebugLocation(I->getDebugLoc());
- auto *ZExt = cast<Instruction>(Builder.CreateZExt(V, ExtTy));
- if (isa<Argument>(V))
- ZExt->moveBefore(InsertPt);
- else
- ZExt->moveAfter(InsertPt);
+
+ Value *ZExt = Builder.CreateZExt(V, ExtTy);
+ if (auto *I = dyn_cast<Instruction>(ZExt)) {
+ if (isa<Argument>(V))
+ I->moveBefore(InsertPt);
+ else
+ I->moveAfter(InsertPt);
+ NewInsts.insert(I);
+ }
+
ReplaceAllUsersOfWith(V, ZExt);
- NewInsts.insert(ZExt);
};
- // First, insert extending instructions between the leaves and their users.
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
- for (auto V : Leaves) {
+ // Now, insert extending instructions between the sources and their users.
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
+ for (auto V : *Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- ZExt->mutateType(ExtTy);
- else if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
else if (auto *Arg = dyn_cast<Argument>(V)) {
BasicBlock &BB = Arg->getParent()->front();
InsertZExt(Arg, &*BB.getFirstInsertionPt());
} else {
- llvm_unreachable("unhandled leaf that needs extending");
+ llvm_unreachable("unhandled source that needs extending");
}
Promoted.insert(V);
}
+}
+void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
- // Then mutate the types of the instructions within the tree. Here we handle
- // constant operands.
- for (auto *V : Visited) {
- if (Leaves.count(V))
- continue;
- if (!isa<Instruction>(V))
+ IRBuilder<> Builder{Ctx};
+
+ // Mutate the types of the instructions within the tree. Here we handle
+ // constant operands.
+ for (auto *V : *Visited) {
+ if (Sources->count(V))
continue;
auto *I = cast<Instruction>(V);
- if (Roots.count(I))
+ if (Sinks->count(I))
continue;
- for (auto &U : I->operands()) {
- if ((U->getType() == ExtTy) || !isSupportedType(&*U))
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ Value *Op = I->getOperand(i);
+ if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
continue;
- if (auto *Const = dyn_cast<ConstantInt>(&*U))
- FixConst(Const, I);
- else if (isa<UndefValue>(&*U))
- U->mutateType(ExtTy);
+ if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+ Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ I->setOperand(i, NewConst);
+ } else if (isa<UndefValue>(Op))
+ I->setOperand(i, UndefValue::get(ExtTy));
}
if (shouldPromote(I)) {
@@ -396,91 +593,215 @@ void IRPromoter::Mutate(Type *OrigTy,
}
}
- // Now we need to remove any zexts that have become unnecessary, as well
- // as insert any intrinsics.
- for (auto *V : Visited) {
- if (Leaves.count(V))
+ // Finally, any instructions that should be promoted but haven't yet been,
+ // need to be handled using intrinsics.
+ for (auto *V : *Visited) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
continue;
- if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- if (ZExt->getDestTy() != ExtTy) {
- ZExt->mutateType(ExtTy);
- Promoted.insert(ZExt);
- }
- else if (ZExt->getSrcTy() == ExtTy) {
- ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
- InstsToRemove.push_back(ZExt);
- }
+
+ if (Sources->count(I) || Sinks->count(I))
continue;
- }
- if (!shouldPromote(V) || isPromotedResultSafe(V))
+ if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
continue;
+
+ assert(EnableDSP && "DSP intrinisc insertion not enabled!");
// Replace unsafe instructions with appropriate intrinsic calls.
- InsertDSPIntrinsic(cast<Instruction>(V));
+ LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
+ << *I << "\n");
+ Function *DSPInst =
+ Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
+ Builder.SetInsertPoint(I);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ Value *Args[] = { I->getOperand(0), I->getOperand(1) };
+ CallInst *Call = Builder.CreateCall(DSPInst, Args);
+ NewInsts.insert(Call);
+ ReplaceAllUsersOfWith(I, Call);
}
+}
+
+void IRPromoter::TruncateSinks() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
+
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+ return nullptr;
+
+ if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+ << *V << "\n");
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+ if (Trunc)
+ NewInsts.insert(Trunc);
+ return Trunc;
+ };
- LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
// Fix up any stores or returns that use the results of the promoted
// chain.
- for (auto I : Roots) {
- LLVM_DEBUG(dbgs() << " - " << *I << "\n");
- Type *TruncTy = OrigTy;
- if (auto *Store = dyn_cast<StoreInst>(I)) {
- auto *PtrTy = cast<PointerType>(Store->getPointerOperandType());
- TruncTy = PtrTy->getElementType();
- } else if (isa<ReturnInst>(I)) {
- Function *F = I->getParent()->getParent();
- TruncTy = F->getFunctionType()->getReturnType();
+ for (auto I : *Sinks) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
+
+ // Handle calls separately as we need to iterate over arg operands.
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ Type *Ty = TruncTysMap[Call][i];
+ if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+ Trunc->moveBefore(Call);
+ Call->setArgOperand(i, Trunc);
+ }
+ }
+ continue;
}
+ // Special case switches because we need to truncate the condition.
+ if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+ Type *Ty = TruncTysMap[Switch][0];
+ if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+ Trunc->moveBefore(Switch);
+ Switch->setCondition(Trunc);
+ }
+ continue;
+ }
+
+ // Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *V = I->getOperand(i);
- if (Promoted.count(V) || NewInsts.count(V)) {
- if (auto *Op = dyn_cast<Instruction>(V)) {
-
- if (auto *Call = dyn_cast<CallInst>(I))
- TruncTy = Call->getFunctionType()->getParamType(i);
-
- if (TruncTy == ExtTy)
- continue;
-
- LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
- << " Trunc for " << *Op << "\n");
- Builder.SetInsertPoint(Op);
- auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
- Trunc->moveBefore(I);
- I->setOperand(i, Trunc);
- NewInsts.insert(Trunc);
- }
+ Type *Ty = TruncTysMap[I][i];
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+ Trunc->moveBefore(I);
+ I->setOperand(i, Trunc);
}
}
}
- LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
}
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
- if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
- return false;
+void IRPromoter::Cleanup() {
+ // Some zexts will now have become redundant, along with their trunc
+ // operands, so remove them
+ for (auto V : *Visited) {
+ if (!isa<CastInst>(V))
+ continue;
- if (ST->isThumb() && !ST->hasThumb2())
- return false;
+ auto ZExt = cast<CastInst>(V);
+ if (ZExt->getDestTy() != ExtTy)
+ continue;
- if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
- return false;
+ Value *Src = ZExt->getOperand(0);
+ if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+ << "\n");
+ ReplaceAllUsersOfWith(ZExt, Src);
+ continue;
+ }
- // TODO
- // Would it be profitable? For Thumb code, these parallel DSP instructions
- // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
- // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
- // halved. They also do not take immediates as operands.
- for (auto &Op : I->operands()) {
- if (isa<Constant>(Op)) {
- if (!EnableDSPWithImms)
- return false;
+ // For any truncs that we insert to handle zexts, we can replace the
+ // result of the zext with the input to the trunc.
+ if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+ auto *Trunc = cast<TruncInst>(Src);
+ assert(Trunc->getOperand(0)->getType() == ExtTy &&
+ "expected inserted trunc to be operating on i32");
+ ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
}
}
- return true;
+
+ for (auto *I : InstsToRemove) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
+ I->dropAllReferences();
+ I->eraseFromParent();
+ }
+
+ InstsToRemove.clear();
+ NewInsts.clear();
+ TruncTysMap.clear();
+ Promoted.clear();
+}
+
+void IRPromoter::ConvertTruncs() {
+ IRBuilder<> Builder{Ctx};
+
+ for (auto *V : *Visited) {
+ if (!isa<TruncInst>(V) || Sources->count(V))
+ continue;
+
+ auto *Trunc = cast<TruncInst>(V);
+ assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
+
+ Builder.SetInsertPoint(Trunc);
+ unsigned NumBits =
+ cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
+ ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+ Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+ if (auto *I = dyn_cast<Instruction>(Masked))
+ NewInsts.insert(I);
+
+ ReplaceAllUsersOfWith(Trunc, Masked);
+ }
+}
+
+void IRPromoter::Mutate(Type *OrigTy,
+ SmallPtrSetImpl<Value*> &Visited,
+ SmallPtrSetImpl<Value*> &Sources,
+ SmallPtrSetImpl<Instruction*> &Sinks,
+ SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+ << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
+
+ assert(isa<IntegerType>(OrigTy) && "expected integer type");
+ this->OrigTy = cast<IntegerType>(OrigTy);
+ assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
+ "original type not smaller than extended type");
+
+ this->Visited = &Visited;
+ this->Sources = &Sources;
+ this->Sinks = &Sinks;
+ this->SafeToPromote = &SafeToPromote;
+
+ // Cache original types of the values that will likely need truncating
+ for (auto *I : Sinks) {
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ TruncTysMap[Call].push_back(Arg->getType());
+ }
+ } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+ TruncTysMap[I].push_back(Switch->getCondition()->getType());
+ else {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ TruncTysMap[I].push_back(I->getOperand(i)->getType());
+ }
+ }
+
+ // Convert adds and subs using negative immediates to equivalent instructions
+ // that use positive constants.
+ PrepareConstants();
+
+ // Insert zext instructions between sources and their users.
+ ExtendSources();
+
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
+ // Promote visited instructions, mutating their types in place. Also insert
+ // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
+ // promote.
+ PromoteTree();
+
+ // Insert trunc instructions for use by calls, stores etc...
+ TruncateSinks();
+
+ // Finally, remove unecessary zexts and truncs, delete old instructions and
+ // clear the data structures.
+ Cleanup();
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
}
/// We accept most instructions, as well as Arguments and ConstantInsts. We
@@ -488,102 +809,133 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
-
- // Non-instruction values that we can handle.
- if (isa<ConstantInt>(V) || isa<Argument>(V))
- return true;
+ if (auto *I = dyn_cast<ICmpInst>(V)) {
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ }
// Memory instructions
- if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+ if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
return true;
// Branches and targets.
- if (auto *ICmp = dyn_cast<ICmpInst>(V))
- return ICmp->isEquality() || !ICmp->isSigned();
-
if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
return true;
- if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
- return true;
+ // Non-instruction values that we can handle.
+ if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
+ return isSupportedType(V);
+
+ if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+ isa<LoadInst>(V))
+ return isSupportedType(V);
+
+ if (isa<SExtInst>(V))
+ return false;
+
+ if (auto *Cast = dyn_cast<CastInst>(V))
+ return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can
- // still be roots.
+ // still be sinks.
if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Cast = dyn_cast<CastInst>(V)) {
- if (isa<ZExtInst>(Cast))
- return Cast->getDestTy()->getScalarSizeInBits() <= 32;
- else if (auto *Trunc = dyn_cast<TruncInst>(V))
- return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
- else {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
- return false;
- }
- } else if (!isa<BinaryOperator>(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+ if (!isa<BinaryOperator>(V))
+ return false;
+
+ if (!isSupportedType(V))
return false;
- }
- bool res = !isSigned(V);
- if (!res)
- LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n");
- return res;
+ if (generateSignBits(V)) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+ return false;
+ }
+ return true;
}
/// Check that the type of V would be promoted and that the original type is
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
- if (!isSupportedType(V))
- return false;
- unsigned VSize = 0;
- if (auto *Ld = dyn_cast<LoadInst>(V)) {
- auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
- VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
- } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
- } else {
- VSize = V->getType()->getPrimitiveSizeInBits();
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ if (SafeToPromote.count(I))
+ return true;
+
+ if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+ SafeToPromote.insert(I);
+ return true;
}
- if (VSize > TypeSize)
+ if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
return false;
- if (isPromotedResultSafe(V))
- return true;
+ // If promotion is not safe, can we use a DSP instruction to natively
+ // handle the narrow type?
+ if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
+ return false;
- if (auto *I = dyn_cast<Instruction>(V))
- return isNarrowInstSupported(I);
+ if (ST->isThumb() && !ST->hasThumb2())
+ return false;
- return false;
+ // TODO
+ // Would it be profitable? For Thumb code, these parallel DSP instructions
+ // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+ // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+ // halved. They also do not take immediates as operands.
+ for (auto &Op : I->operands()) {
+ if (isa<Constant>(Op)) {
+ if (!EnableDSPWithImms)
+ return false;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
+ return true;
}
bool ARMCodeGenPrepare::TryToPromote(Value *V) {
OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits();
+ if (TypeSize > 16 || TypeSize < 8)
+ return false;
+
+ SafeToPromote.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+ << TypeSize << "\n");
SetVector<Value*> WorkList;
- SmallPtrSet<Value*, 8> Leaves;
- SmallPtrSet<Instruction*, 4> Roots;
- WorkList.insert(V);
+ SmallPtrSet<Value*, 8> Sources;
+ SmallPtrSet<Instruction*, 4> Sinks;
SmallPtrSet<Value*, 16> CurrentVisited;
- CurrentVisited.clear();
+ WorkList.insert(V);
- // Return true if the given value can, or has been, visited. Add V to the
- // worklist if needed.
+ // Return true if V was added to the worklist as a supported instruction,
+ // if it was already visited, or if we don't need to explore it (e.g.
+ // pointer values and GEPs), and false otherwise.
auto AddLegalInst = [&](Value *V) {
if (CurrentVisited.count(V))
return true;
+ // Ignore GEPs because they don't need promoting and the constant indices
+ // will prevent the transformation.
+ if (isa<GetElementPtrInst>(V))
+ return true;
+
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
return false;
@@ -600,6 +952,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (CurrentVisited.count(V))
continue;
+ // Ignore non-instructions, other than arguments.
if (!isa<Instruction>(V) && !isSource(V))
continue;
@@ -607,24 +960,26 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
// the tree has already been explored.
// TODO: This could limit the transform, ie if we try to promote something
// from an i8 and fail first, before trying an i16.
- if (AllVisited.count(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: Already visited this: " << *V << "\n");
+ if (AllVisited.count(V))
return false;
- }
CurrentVisited.insert(V);
AllVisited.insert(V);
// Calls can be both sources and sinks.
if (isSink(V))
- Roots.insert(cast<Instruction>(V));
+ Sinks.insert(cast<Instruction>(V));
+
if (isSource(V))
- Leaves.insert(V);
- else if (auto *I = dyn_cast<Instruction>(V)) {
- // Visit operands of any instruction visited.
- for (auto &U : I->operands()) {
- if (!AddLegalInst(U))
- return false;
+ Sources.insert(V);
+
+ if (!isSink(V) && !isSource(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Visit operands of any instruction visited.
+ for (auto &U : I->operands()) {
+ if (!AddLegalInst(U))
+ return false;
+ }
}
}
@@ -638,43 +993,23 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
}
}
- unsigned NumToPromote = 0;
- unsigned Cost = 0;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ );
+ unsigned ToPromote = 0;
for (auto *V : CurrentVisited) {
- // Truncs will cause a uxt and no zeroext arguments will often require
- // a uxt somewhere.
- if (isa<TruncInst>(V))
- ++Cost;
- else if (auto *Arg = dyn_cast<Argument>(V)) {
- if (!Arg->hasZExtAttr())
- ++Cost;
- }
-
- // Mem ops can automatically be extended/truncated and non-instructions
- // don't need anything done.
- if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
+ if (Sources.count(V))
continue;
-
- // Will need to truncate calls args and returns.
- if (Roots.count(cast<Instruction>(V))) {
- ++Cost;
+ if (Sinks.count(cast<Instruction>(V)))
continue;
- }
-
- if (shouldPromote(V))
- ++NumToPromote;
+ ++ToPromote;
}
- LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
- for (auto *I : CurrentVisited)
- I->dump();
- );
- LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
- << " instructions = " << Cost << "\n");
- if (Cost > NumToPromote || (NumToPromote == 0))
+ if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
+ Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
return true;
}
@@ -711,19 +1046,15 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
continue;
LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+
for (auto &Op : CI.operands()) {
- if (auto *I = dyn_cast<Instruction>(Op)) {
- if (isa<ZExtInst>(I))
- MadeChange |= TryToPromote(I->getOperand(0));
- else
- MadeChange |= TryToPromote(I);
- }
+ if (auto *I = dyn_cast<Instruction>(Op))
+ MadeChange |= TryToPromote(I);
}
}
}
- Promoter->Cleanup();
LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
- dbgs();
+ dbgs() << F;
report_fatal_error("Broken function after type promotion");
});
}
@@ -744,6 +1075,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
false, false)
char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
FunctionPass *llvm::createARMCodeGenPreparePass() {
return new ARMCodeGenPrepare();
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 2c4738d3cb74..5e97c4cb35e3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1420,6 +1420,22 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
MI = LastIT;
}
+ // Avoid splitting a MOVW+MOVT pair with a relocation on Windows.
+ // On Windows, this instruction pair is covered by one single
+ // IMAGE_REL_ARM_MOV32T relocation which covers both instructions. If a
+ // constant island is injected inbetween them, the relocation will clobber
+ // the instruction and fail to update the MOVT instruction.
+ // (These instructions are bundled up until right before the ConstantIslands
+ // pass.)
+ if (STI->isTargetWindows() && isThumb && MI->getOpcode() == ARM::t2MOVTi16 &&
+ (MI->getOperand(2).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+ ARMII::MO_HI16) {
+ --MI;
+ assert(MI->getOpcode() == ARM::t2MOVi16 &&
+ (MI->getOperand(1).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+ ARMII::MO_LO16);
+ }
+
// We really must not split an IT block.
LLVM_DEBUG(unsigned PredReg; assert(
!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5dac6ec0b799..eecd0a10dc7d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -570,7 +570,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -645,7 +645,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -735,7 +735,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -848,8 +848,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
- LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.cloneMemRefs(MI);
+ HI16.cloneMemRefs(MI);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@@ -899,8 +899,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
}
}
- LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.cloneMemRefs(MI);
+ HI16.cloneMemRefs(MI);
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
@@ -1030,10 +1030,10 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
if (IsThumb) {
unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
- MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
- MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
+ MIB.addReg(RegLo, Flags);
+ MIB.addReg(RegHi, Flags);
} else
- MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
+ MIB.addReg(Reg.getReg(), Flags);
}
/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
@@ -1103,7 +1103,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// bne .Lloadcmp
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
- addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
+ unsigned Flags = getKillRegState(New.isDead());
+ addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
@@ -1425,7 +1426,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol("__aeabi_read_tp", 0);
}
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -1440,7 +1441,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
.add(MI.getOperand(1))
.add(predOps(ARMCC::AL));
- MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB1.cloneMemRefs(MI);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1544,7 +1545,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (isARM) {
MIB3.add(predOps(ARMCC::AL));
if (Opcode == ARM::MOV_ga_pcrel_ldr)
- MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB3.cloneMemRefs(MI);
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
@@ -1596,7 +1597,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
@@ -1629,7 +1630,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index a66cd7053c0a..a50abfdbee44 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2951,7 +2951,8 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
@@ -2970,12 +2971,16 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned ConstAlign =
MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
- .addConstantPoolIndex(Idx);
+ .addConstantPoolIndex(Idx)
+ .addMemOperand(CPMMO);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -2988,6 +2993,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
+
if (!Subtarget->isThumb())
MIB.add(predOps(ARMCC::AL));
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 56ad7a0f0446..a9d87ced31f3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -79,12 +79,11 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
STI(sti) {}
-bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
// iOS always has a FP for backtracking, force other targets to keep their FP
// when doing FastISel. The emitted code is currently superior, and in cases
// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
- return TargetFrameLowering::noFramePointerElim(MF) ||
- MF.getSubtarget<ARMSubtarget>().useFastISel();
+ return MF.getSubtarget<ARMSubtarget>().useFastISel();
}
/// Returns true if the target can safely skip saving callee-saved registers
@@ -526,6 +525,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
switch (TM.getCodeModel()) {
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny code model not available on ARM.");
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
@@ -909,6 +910,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
assert(RegInfo->hasBasePointer(MF) &&
"VLAs and dynamic stack alignment, but missing base pointer!");
FrameReg = RegInfo->getBaseRegister();
+ Offset -= SPAdj;
}
return Offset;
}
@@ -1006,8 +1008,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
- llvm::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
- const RegAndKill &RHS) {
+ llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
});
@@ -1103,7 +1104,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Regs.empty())
continue;
- llvm::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
+ llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
});
@@ -1921,9 +1922,13 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
<< "\n");
}
+ // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
+ // restore LR in that case.
+ bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
+
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
- if (!LRSpilled && CS1Spilled) {
+ if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
@@ -1949,7 +1954,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
(STI.isTargetWindows() && Reg == ARM::R11) ||
- isARMLowRegister(Reg) || Reg == ARM::LR) {
+ isARMLowRegister(Reg) ||
+ (Reg == ARM::LR && !ExpensiveLRRestore)) {
SavedRegs.set(Reg);
LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
<< " to make up alignment\n");
@@ -2151,9 +2157,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Do not generate a prologue for leaf functions with a stack of size zero.
// For non-leaf functions we have to allow for the possibility that the
- // call is to a non-split function, as in PR37807.
- if (StackSize == 0 && !MFI.hasTailCall())
+ // callis to a non-split function, as in PR37807. This function could also
+ // take the address of a non-split function. When the linker tries to adjust
+ // its non-existent prologue, it would fail with an error. Mark the object
+ // file so that such failures are not errors. See this Go language bug-report
+ // https://go-review.googlesource.com/c/go/+/148819/
+ if (StackSize == 0 && !MFI.hasTailCall()) {
+ MF.getMMI().setHasNosplitStack(true);
return;
+ }
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index e994cab28fe7..2f7e23840e75 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -42,7 +42,7 @@ public:
std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
- bool noFramePointerElim(const MachineFunction &MF) const override;
+ bool keepFramePointer(const MachineFunction &MF) const override;
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9592dd53c347..8e0e82388251 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1345,9 +1345,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
}
void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
@@ -1764,12 +1763,14 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
default: llvm_unreachable("unhandled vld type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4f16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1i64: OpcodeIndex = 3; break;
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -1854,9 +1855,8 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
}
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
if (NumVecs == 1) {
ReplaceNode(N, VLd);
@@ -1893,8 +1893,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
EVT VT = N->getOperand(Vec0Idx).getValueType();
@@ -1983,7 +1982,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
- cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
ReplaceNode(N, VSt);
return;
@@ -2007,7 +2006,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
MVT::Other, OpsA);
- cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
Chain = SDValue(VStA, 1);
// Store the odd D registers.
@@ -2026,7 +2025,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
Ops);
- cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
ReplaceNode(N, VStB);
}
@@ -2045,8 +2044,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
unsigned Lane =
@@ -2135,7 +2133,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
if (!IsLoad) {
ReplaceNode(N, VLdLn);
return;
@@ -2264,9 +2262,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
}
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
// Extract the subregisters.
if (NumVecs == 1) {
@@ -2309,6 +2306,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Mask off the unnecessary bits of the AND immediate; normally
+ // DAGCombine will do this, but that might not happen if
+ // targetShrinkDemandedConstant chooses a different immediate.
+ And_imm &= -1U >> Srl_imm;
+
// Note: The width operand is encoded as width-1.
unsigned Width = countTrailingOnes(And_imm) - 1;
unsigned LSB = Srl_imm;
@@ -2476,9 +2478,8 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
Opcode, SDLoc(N),
CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2627,12 +2628,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// queries work properly. This e.g. gives the register allocation the
// required information for rematerialization.
MachineFunction& MF = CurDAG->getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = MF.getMachineMemOperand(
- MachinePointerInfo::getConstantPool(MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *MemOp =
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 4, 4);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
ReplaceNode(N, ResNode);
return;
@@ -3030,11 +3030,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VZIPq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VZIPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
@@ -3051,11 +3053,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VUZPq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VUZPq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
@@ -3072,10 +3076,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (VT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
+ case MVT::v4f16:
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
case MVT::v2i32: Opc = ARM::VTRNd32; break;
case MVT::v16i8: Opc = ARM::VTRNq8; break;
+ case MVT::v8f16:
case MVT::v8i16: Opc = ARM::VTRNq16; break;
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
@@ -3410,9 +3416,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32), Chain};
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
// Remap uses.
SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
@@ -3478,9 +3483,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
ReplaceNode(N, St);
return;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ede276dd91bb..21de0f6a7630 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -651,9 +651,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// it have a FP_TO_[SU]INT instruction with a narrower destination than
// source.
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
@@ -665,8 +669,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
- setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
- setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
@@ -846,8 +850,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
+ }
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
@@ -950,6 +956,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -977,7 +984,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
- if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
+ if (!Subtarget->hasAcquireRelease() ||
+ getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
InsertFencesForAtomic = true;
}
@@ -1136,14 +1144,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we use
// a NEON instruction with an undef lane instead.
- setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
+
+ setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+ }
}
// We have target-specific dag combine patterns for the following nodes:
@@ -1181,6 +1201,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
+ setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
+
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1261,6 +1283,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::SUBS: return "ARMISD::SUBS";
case ARMISD::SSAT: return "ARMISD::SSAT";
case ARMISD::USAT: return "ARMISD::USAT";
@@ -3052,41 +3075,8 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
return true;
}
-/// Return true if all users of V are within some (any) function, looking through
-/// ConstantExprs. In other words, are there any global constant users?
-static bool allUsersAreInFunctions(const Value *V) {
- SmallVector<const User*,4> Worklist;
- for (auto *U : V->users())
- Worklist.push_back(U);
- while (!Worklist.empty()) {
- auto *U = Worklist.pop_back_val();
- if (isa<ConstantExpr>(U)) {
- for (auto *UU : U->users())
- Worklist.push_back(UU);
- continue;
- }
-
- if (!isa<Instruction>(U))
- return false;
- }
- return true;
-}
-
-// Return true if T is an integer, float or an array/vector of either.
-static bool isSimpleType(Type *T) {
- if (T->isIntegerTy() || T->isFloatingPointTy())
- return true;
- Type *SubT = nullptr;
- if (T->isArrayTy())
- SubT = T->getArrayElementType();
- else if (T->isVectorTy())
- SubT = T->getVectorElementType();
- else
- return false;
- return SubT->isIntegerTy() || SubT->isFloatingPointTy();
-}
-
-static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
+ const GlobalValue *GV, SelectionDAG &DAG,
EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
@@ -3113,11 +3103,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
!GVar->hasLocalLinkage())
return SDValue();
- // Ensure that we don't try and inline any type that contains pointers. If
- // we inline a value that contains relocations, we move the relocations from
- // .data to .text which is not ideal.
+ // If we inline a value that contains relocations, we move the relocations
+ // from .data to .text. This is not allowed in position-independent code.
auto *Init = GVar->getInitializer();
- if (!isSimpleType(Init->getType()))
+ if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
+ Init->needsRelocation())
return SDValue();
// The constant islands pass can only really deal with alignment requests
@@ -3128,7 +3118,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// that are strings for simplicity.
auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
- unsigned Align = GVar->getAlignment();
+ unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
@@ -3149,12 +3139,14 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
ConstpoolPromotionMaxTotal)
return SDValue();
- // This is only valid if all users are in a single function OR it has users
- // in multiple functions but it no larger than a pointer. We also check if
- // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
- // address taken.
- if (!allUsersAreInFunction(GVar, &F) &&
- !(Size <= 4 && allUsersAreInFunctions(GVar)))
+ // This is only valid if all users are in a single function; we can't clone
+ // the constant in general. The LLVM IR unnamed_addr allows merging
+ // constants, but not cloning them.
+ //
+ // We could potentially allow cloning if we could prove all uses of the
+ // constant in the current function don't care about the address, like
+ // printf format strings. But that isn't implemented for now.
+ if (!allUsersAreInFunction(GVar, &F))
return SDValue();
// We're going to inline this global. Pad it out if needed.
@@ -3182,9 +3174,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getBaseObject();
- return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
- isa<Function>(GV);
+ if (!(GV = GA->getBaseObject()))
+ return false;
+ if (const auto *V = dyn_cast<GlobalVariable>(GV))
+ return V->isConstant();
+ return isa<Function>(GV);
}
SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -3210,7 +3204,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// promoteToConstantPool only if not generating XO text section
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
- if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+ if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
return V;
if (isPositionIndependent()) {
@@ -3299,9 +3293,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
+ const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- const ARMII::TOF TargetFlags =
- (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
+ ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
+ if (GV->hasDLLImportStorageClass())
+ TargetFlags = ARMII::MO_DLLIMPORT;
+ else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ TargetFlags = ARMII::MO_COFFSTUB;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
SDLoc DL(Op);
@@ -3313,7 +3311,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
TargetFlags));
- if (GV->hasDLLImportStorageClass())
+ if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
@@ -3412,7 +3410,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
Op.getOperand(1), Op.getOperand(2));
}
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
- ? ISD::FMINNAN : ISD::FMAXNAN;
+ ? ISD::FMINIMUM : ISD::FMAXIMUM;
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
@@ -4832,12 +4830,24 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
- "Invalid type for custom lowering!");
- if (VT != MVT::v4i16)
+ const bool HasFullFP16 =
+ static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+ EVT NewTy;
+ const EVT OpTy = Op.getOperand(0).getValueType();
+ if (OpTy == MVT::v4f32)
+ NewTy = MVT::v4i32;
+ else if (OpTy == MVT::v4f16 && HasFullFP16)
+ NewTy = MVT::v4i16;
+ else if (OpTy == MVT::v8f16 && HasFullFP16)
+ NewTy = MVT::v8i16;
+ else
+ llvm_unreachable("Invalid type for custom lowering!");
+
+ if (VT != MVT::v4i16 && VT != MVT::v8i16)
return DAG.UnrollVectorOp(Op.getNode());
- Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
@@ -4870,9 +4880,21 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+ assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
+ Op.getOperand(0).getValueType() == MVT::v8i16) &&
"Invalid type for custom lowering!");
- if (VT != MVT::v4f32)
+
+ const bool HasFullFP16 =
+ static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+ EVT DestVecType;
+ if (VT == MVT::v4f32)
+ DestVecType = MVT::v4i32;
+ else if (VT == MVT::v4f16 && HasFullFP16)
+ DestVecType = MVT::v4i16;
+ else if (VT == MVT::v8f16 && HasFullFP16)
+ DestVecType = MVT::v8i16;
+ else
return DAG.UnrollVectorOp(Op.getNode());
unsigned CastOpc;
@@ -4889,7 +4911,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
break;
}
- Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
return DAG.getNode(Opc, dl, VT, Op);
}
@@ -5392,10 +5414,6 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
// Compute with: cttz(x) = ctpop(lsb - 1)
- // Since we can only compute the number of bits in a byte with vcnt.8, we
- // have to gather the result with pairwise addition (vpaddl) for i16, i32,
- // and i64.
-
// Compute LSB - 1.
SDValue Bits;
if (ElemTy == MVT::i64) {
@@ -5408,32 +5426,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
DAG.getTargetConstant(1, dl, ElemTy));
Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
}
-
- // Count #bits with vcnt.8.
- EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
- SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
- SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
-
- // Gather the #bits with vpaddl (pairwise add.)
- EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
- SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt8);
- if (ElemTy == MVT::i16)
- return Cnt16;
-
- EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
- SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt16);
- if (ElemTy == MVT::i32)
- return Cnt32;
-
- assert(ElemTy == MVT::i64);
- SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
- Cnt32);
- return Cnt64;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
}
if (!ST->hasV6T2Ops())
@@ -5443,112 +5436,37 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
-/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
-/// for each 16-bit element from operand, repeated. The basic idea is to
-/// leverage vcnt to get the 8-bit counts, gather and add the results.
-///
-/// Trace for v4i16:
-/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
-/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
-/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
-/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
-/// [b0 b1 b2 b3 b4 b5 b6 b7]
-/// +[b1 b0 b3 b2 b5 b4 b7 b6]
-/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
-/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
-static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
- SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
- SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
- SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
- return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
-}
-
-/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
-/// bit-count for each 16-bit element from the operand. We need slightly
-/// different sequencing for v4i16 and v8i16 to stay within NEON's available
-/// 64/128-bit registers.
-///
-/// Trace for v4i16:
-/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
-/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
-/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
-/// v4i16:Extracted = [k0 k1 k2 k3 ]
-static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
- if (VT.is64BitVector()) {
- SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
- DAG.getIntPtrConstant(0, DL));
- } else {
- SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
- BitCounts, DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
- }
-}
-
-/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
-/// bit-count for each 32-bit element from the operand. The idea here is
-/// to split the vector into 16-bit elements, leverage the 16-bit count
-/// routine, and then combine the results.
-///
-/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
-/// input = [v0 v1 ] (vi: 32-bit elements)
-/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
-/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
-/// vrev: N0 = [k1 k0 k3 k2 ]
-/// [k0 k1 k2 k3 ]
-/// N1 =+[k1 k0 k3 k2 ]
-/// [k0 k2 k1 k3 ]
-/// N2 =+[k1 k3 k0 k2 ]
-/// [k0 k2 k1 k3 ]
-/// Extended =+[k1 k3 k0 k2 ]
-/// [k0 k2 ]
-/// Extracted=+[k1 k3 ]
-///
-static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
+ assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+ assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+ VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+ "Unexpected type for custom ctpop lowering");
- EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+ SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
+ Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
- SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
- SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
- SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
- SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
- SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+ // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+ unsigned EltSize = 8;
+ unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+ while (EltSize != VT.getScalarSizeInBits()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Ops.push_back(Res);
- if (VT.is64BitVector()) {
- SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
- DAG.getIntPtrConstant(0, DL));
- } else {
- SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+ EltSize *= 2;
+ NumElts /= 2;
+ MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
}
-}
-static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- EVT VT = N->getValueType(0);
-
- assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
- assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
- VT == MVT::v4i16 || VT == MVT::v8i16) &&
- "Unexpected type for custom ctpop lowering");
-
- if (VT.getVectorElementType() == MVT::i32)
- return lowerCTPOP32BitElements(N, DAG);
- else
- return lowerCTPOP16BitElements(N, DAG);
+ return Res;
}
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
@@ -7878,6 +7796,50 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
return LowerCallTo(CLI).first;
}
+// This is a code size optimisation: return the original SDIV node to
+// DAGCombiner when we don't want to expand SDIV into a sequence of
+// instructions, and an empty node otherwise which will cause the
+// SDIV to be expanded in DAGCombine.
+SDValue
+ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // TODO: Support SREM
+ if (N->getOpcode() != ISD::SDIV)
+ return SDValue();
+
+ const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+ const auto &MF = DAG.getMachineFunction();
+ const bool MinSize = MF.getFunction().optForMinSize();
+ const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
+ : ST.hasDivideInARMMode();
+
+ // Don't touch vector types; rewriting this may lead to scalarizing
+ // the int divs.
+ if (N->getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
+ // hwdiv support for this to be really profitable.
+ if (!(MinSize && HasDivide))
+ return SDValue();
+
+ // ARM mode is a bit simpler than Thumb: we can handle large power
+ // of 2 immediates with 1 mov instruction; no further checks required,
+ // just return the sdiv node.
+ if (!ST.isThumb())
+ return SDValue(N, 0);
+
+ // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
+ // and thus lose the code size benefits of a MOVS that requires only 2.
+ // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
+ // but as it's doing exactly this, it's not worth the trouble to get TTI.
+ if (Divisor.sgt(128))
+ return SDValue();
+
+ return SDValue(N, 0);
+}
+
SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
assert(Op.getValueType() == MVT::i32 &&
@@ -7990,10 +7952,8 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
ARM::CMP_SWAP_64, SDLoc(N),
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
- MachineFunction &MF = DAG.getMachineFunction();
- MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
bool isBigEndian = DAG.getDataLayout().isBigEndian();
@@ -9169,6 +9129,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
// IP.
switch (TM.getCodeModel()) {
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny code model not available on ARM.");
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
@@ -9244,6 +9206,42 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
return ContBB;
}
+// The CPSR operand of SelectItr might be missing a kill marker
+// because there were multiple uses of CPSR, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
+ MachineBasicBlock* BB,
+ const TargetRegisterInfo* TRI) {
+ // Scan forward through BB for a use/def of CPSR.
+ MachineBasicBlock::iterator miI(std::next(SelectItr));
+ for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(ARM::CPSR))
+ return false;
+ if (mi.definesRegister(ARM::CPSR))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether CPSR is live into a
+ // successor.
+ if (miI == BB->end()) {
+ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+ sEnd = BB->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock* succ = *sItr;
+ if (succ->isLiveIn(ARM::CPSR))
+ return false;
+ }
+ }
+
+ // We found a def, or hit the end of the basic block and CPSR wasn't live
+ // out. SelectMI should have a kill flag on CPSR.
+ SelectItr->addRegisterKilled(ARM::CPSR, TRI);
+ return true;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -9343,6 +9341,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
+ // Check whether CPSR is live past the tMOVCCr_pseudo.
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ if (!MI.killsRegister(ARM::CPSR) &&
+ !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
+ copy0MBB->addLiveIn(ARM::CPSR);
+ sinkMBB->addLiveIn(ARM::CPSR);
+ }
+
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
@@ -10407,6 +10413,37 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+bool
+ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const {
+ if (Level == BeforeLegalizeTypes)
+ return true;
+
+ if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+ return true;
+
+ if (N->getOpcode() != ISD::SHL)
+ return true;
+
+ // Turn off commute-with-shift transform after legalization, so it doesn't
+ // conflict with PerformSHLSimplify. (We could try to detect when
+ // PerformSHLSimplify would trigger more precisely, but it isn't
+ // really necessary.)
+ return false;
+}
+
+bool
+ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ if (!Subtarget->isThumb1Only())
+ return true;
+
+ if (Level == BeforeLegalizeTypes)
+ return true;
+
+ return false;
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -10506,9 +10543,7 @@ static SDValue PerformSHLSimplify(SDNode *N,
LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
SHL.dump(); N->dump());
LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
-
- DAG.ReplaceAllUsesWith(SDValue(N, 0), Res);
- return SDValue(N, 0);
+ return Res;
}
@@ -10712,6 +10747,12 @@ static SDValue CombineANDShift(SDNode *N,
if (!C2 || C2 >= 32)
return SDValue();
+ // Clear irrelevant bits in the mask.
+ if (LeftShift)
+ C1 &= (-1U << C2);
+ else
+ C1 &= (-1U >> C2);
+
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
@@ -10719,9 +10760,7 @@ static SDValue CombineANDShift(SDNode *N,
// "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
// transform to a pair of shifts, to save materializing c1.
- // First pattern: right shift, and c1+1 is a power of two.
- // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
- // of two).
+ // First pattern: right shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (!LeftShift && isMask_32(C1)) {
uint32_t C3 = countLeadingZeros(C1);
@@ -10733,13 +10772,23 @@ static SDValue CombineANDShift(SDNode *N,
}
}
- // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
- // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
- // is a power of two).
+ // First pattern, reversed: left shift, then mask off trailing bits.
+ if (LeftShift && isMask_32(~C1)) {
+ uint32_t C3 = countTrailingZeros(C1);
+ if (C2 < C3) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C3 - C2, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
+ // Second pattern: left shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Trailing = countTrailingZeros(C1);
uint32_t C3 = countLeadingZeros(C1);
- if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
+ if (Trailing == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
@@ -10747,6 +10796,19 @@ static SDValue CombineANDShift(SDNode *N,
}
}
+ // Second pattern, reversed: right shift, then mask off trailing bits.
+ // FIXME: Handle other patterns of known/demanded bits.
+ if (!LeftShift && isShiftedMask_32(C1)) {
+ uint32_t Leading = countLeadingZeros(C1);
+ uint32_t C3 = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + C3 < 32) {
+ SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(C2 + C3, DL, MVT::i32));
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+ DAG.getConstant(C3, DL, MVT::i32));
+ }
+ }
+
// FIXME: Transform "(and (shl x, c2) c1)" ->
// "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
// c1.
@@ -11541,8 +11603,15 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
// Check that the add is independent of the load/store. Otherwise, folding
- // it would create a cycle.
- if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ // it would create a cycle. We can avoid searching through Addr as it's a
+ // predecessor to both.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Addr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
@@ -12507,8 +12576,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
- KnownBits Known;
- DAG.computeKnownBits(Y, Known);
+ KnownBits Known = DAG.computeKnownBits(Y);
if ((OrCI & Known.Zero) != OrCI)
return SDValue();
@@ -12679,30 +12747,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
}
- } else if (CC == ARMCC::NE && LHS != RHS &&
+ } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
(!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
// This seems pointless but will allow us to combine it further below.
- // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+ SDValue Sub =
+ DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
- N->getOperand(3), Cmp);
+ N->getOperand(3), CPSRGlue.getValue(1));
+ FalseVal = Sub;
}
} else if (isNullConstant(TrueVal)) {
- if (CC == ARMCC::EQ && LHS != RHS &&
+ if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
(!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
// This seems pointless but will allow us to combine it further below
// Note that we change == for != as this is the dual for the case above.
- // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+ SDValue Sub =
+ DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+ SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+ Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
DAG.getConstant(ARMCC::NE, dl, MVT::i32),
- N->getOperand(3), Cmp);
+ N->getOperand(3), CPSRGlue.getValue(1));
+ FalseVal = Sub;
}
}
// On Thumb1, the DAG above may be further combined if z is a power of 2
// (z == 2 ^ K).
- // CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
+ // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
// merge t3, t4
// where t1 = (SUBCARRY (SUB x, y), z, 0)
// t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
@@ -12710,8 +12786,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
const APInt *TrueConst;
if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
- (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
- (FalseVal.getOperand(1) == RHS) &&
+ (FalseVal.getOpcode() == ARMISD::SUBS) &&
+ (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
(TrueConst = isPowerOf2Constant(TrueVal))) {
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned ShiftAmount = TrueConst->logBase2();
@@ -12730,8 +12806,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
}
if (Res.getNode()) {
- KnownBits Known;
- DAG.computeKnownBits(SDValue(N,0), Known);
+ KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
// Capture demanded bits information that would be otherwise lost.
if (Known.Zero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -13522,12 +13597,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
if (Known.isUnknown())
return;
- KnownBits KnownRHS;
- DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
+ KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
Known.Zero &= KnownRHS.Zero;
Known.One &= KnownRHS.One;
return;
@@ -13549,7 +13623,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case ARMISD::BFI: {
// Conservatively, we can recurse down the first operand
// and just mask out all affected bits.
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
@@ -13559,9 +13633,120 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One &= Mask;
return;
}
+ case ARMISD::VGETLANEs:
+ case ARMISD::VGETLANEu: {
+ const SDValue &SrcSV = Op.getOperand(0);
+ EVT VecVT = SrcSV.getValueType();
+ assert(VecVT.isVector() && "VGETLANE expected a vector type");
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
+ assert(Pos->getAPIntValue().ult(NumSrcElts) &&
+ "VGETLANE index out of bounds");
+ unsigned Idx = Pos->getZExtValue();
+ APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+ Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
+
+ EVT VT = Op.getValueType();
+ const unsigned DstSz = VT.getScalarSizeInBits();
+ const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+ assert(SrcSz == Known.getBitWidth());
+ assert(DstSz > SrcSz);
+ if (Op.getOpcode() == ARMISD::VGETLANEs)
+ Known = Known.sext(DstSz);
+ else {
+ Known = Known.zext(DstSz);
+ Known.Zero.setBitsFrom(SrcSz);
+ }
+ assert(DstSz == Known.getBitWidth());
+ break;
+ }
}
}
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedAPInt,
+ TargetLoweringOpt &TLO) const {
+ // Delay optimization, so we don't have to deal with illegal types, or block
+ // optimizations.
+ if (!TLO.LegalOps)
+ return false;
+
+ // Only optimize AND for now.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ EVT VT = Op.getValueType();
+
+ // Ignore vectors.
+ if (VT.isVector())
+ return false;
+
+ assert(VT == MVT::i32 && "Unexpected integer type");
+
+ // Make sure the RHS really is a constant.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ unsigned Mask = C->getZExtValue();
+
+ unsigned Demanded = DemandedAPInt.getZExtValue();
+ unsigned ShrunkMask = Mask & Demanded;
+ unsigned ExpandedMask = Mask | ~Demanded;
+
+ // If the mask is all zeros, let the target-independent code replace the
+ // result with zero.
+ if (ShrunkMask == 0)
+ return false;
+
+ // If the mask is all ones, erase the AND. (Currently, the target-independent
+ // code won't do this, so we have to do it explicitly to avoid an infinite
+ // loop in obscure cases.)
+ if (ExpandedMask == ~0U)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+ };
+ auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+ if (NewMask == Mask)
+ return true;
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ };
+
+ // Prefer uxtb mask.
+ if (IsLegalMask(0xFF))
+ return UseMask(0xFF);
+
+ // Prefer uxth mask.
+ if (IsLegalMask(0xFFFF))
+ return UseMask(0xFFFF);
+
+ // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if (ShrunkMask < 256)
+ return UseMask(ShrunkMask);
+
+ // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+ return UseMask(ExpandedMask);
+
+ // Potential improvements:
+ //
+ // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+ // We could try to prefer Thumb1 immediates which can be lowered to a
+ // two-instruction sequence.
+ // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -14412,16 +14597,18 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
-bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
- AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- bool hasAtomicCmpXchg =
+ bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+ return AtomicExpansionKind::LLSC;
+ return AtomicExpansionKind::None;
}
bool ARMTargetLowering::shouldInsertFencesForAtomic(
@@ -14548,6 +14735,11 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+
+bool ARMTargetLowering::alignLoopsWithOptSize() const {
+ return Subtarget->isMClass();
+}
+
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 50b4c2977fb5..7a9fc739fc13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -85,6 +85,7 @@ class VectorType;
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
+ SUBS, // Flag-setting subtraction.
SSAT, // Signed saturation
USAT, // Unsigned saturation
@@ -389,6 +390,9 @@ class VectorType;
const SelectionDAG &DAG,
unsigned Depth) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
bool ExpandInlineAsm(CallInst *CI) const override;
@@ -535,7 +539,8 @@ class VectorType;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
@@ -572,6 +577,8 @@ class VectorType;
bool isLegalInterleavedAccessType(VectorType *VecTy,
const DataLayout &DL) const;
+ bool alignLoopsWithOptSize() const override;
+
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
unsigned getNumInterleavedAccesses(VectorType *VecTy,
@@ -583,6 +590,11 @@ class VectorType;
unsigned getABIAlignmentForCallingConv(Type *ArgTy,
DataLayout DL) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
+ bool shouldFoldShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -685,6 +697,9 @@ class VectorType;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
@@ -764,6 +779,8 @@ class VectorType;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool shouldConsiderGEPOffsetSplit() const override { return true; }
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1d3b1414f090..0df48ba61299 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -2580,6 +2580,37 @@ class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4,
let Inst{3-0} = Vm{3-0};
}
+// In Armv8.2-A, some NEON instructions are added that encode Vn and Vm
+// differently:
+// if Q == ‘1’ then UInt(N:Vn) else UInt(Vn:N);
+// if Q == ‘1’ then UInt(M:Vm) else UInt(Vm:M);
+// Class N3VCP8 above describes the Q=1 case, and this class the Q=0 case.
+class N3VCP8Q0<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, dt, asm, cstr, pattern> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let DecoderNamespace = "VFPV8";
+ // These have the same encodings in ARM and Thumb2
+ let PostEncoderMethod = "";
+
+ let Inst{31-25} = 0b1111110;
+ let Inst{24-23} = op24_23;
+ let Inst{22} = Vd{4};
+ let Inst{21-20} = op21_20;
+ let Inst{19-16} = Vn{4-1};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{11-8} = 0b1000;
+ let Inst{7} = Vn{0};
+ let Inst{6} = op6;
+ let Inst{5} = Vm{0};
+ let Inst{4} = op4;
+ let Inst{3-0} = Vm{4-1};
+}
+
// Operand types for complex instructions
class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag>
: AsmOperandClass {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 397c9dadb4ac..bcc31f5fa4cc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -132,34 +132,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .cloneMemRefs(*MI)
.add(predOps(ARMCC::AL));
}
-
-std::pair<unsigned, unsigned>
-ARMInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
- const unsigned Mask = ARMII::MO_OPTION_MASK;
- return std::make_pair(TF & Mask, TF & ~Mask);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
- using namespace ARMII;
-
- static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
- return makeArrayRef(TargetFlags);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
- using namespace ARMII;
-
- static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_GOT, "arm-got"},
- {MO_SBREL, "arm-sbrel"},
- {MO_DLLIMPORT, "arm-dllimport"},
- {MO_SECREL, "arm-secrel"},
- {MO_NONLAZY, "arm-nonlazy"}};
- return makeArrayRef(TargetFlags);
-}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index c54c987134df..c87fb97448c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -38,13 +38,6 @@ public:
///
const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
- std::pair<unsigned, unsigned>
- decomposeMachineOperandsTargetFlags(unsigned TF) const override;
- ArrayRef<std::pair<unsigned, const char *>>
- getSerializableDirectMachineOperandTargetFlags() const override;
- ArrayRef<std::pair<unsigned, const char *>>
- getSerializableBitmaskMachineOperandTargetFlags() const override;
-
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index d4c342cee5c0..13abdc9687ec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -144,6 +144,7 @@ def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
+def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
@@ -221,6 +222,7 @@ def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
AssemblerPredicate<"HasV5TEOps", "armv5te">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
@@ -255,6 +257,8 @@ def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
+ AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -285,6 +289,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
+ AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -351,23 +357,24 @@ def UseNegativeImmediates :
let RecomputePerFunction = 1 in {
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
- def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
- def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+ def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
+ def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+
+ def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+ " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+ "MF->getFunction().optForMinSize())">;
}
-def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
// Do not use them for Darwin platforms.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast && "
" Subtarget->hasVFP4()) && "
- "!Subtarget->isTargetDarwin()">;
-def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
- " FPOpFusion::Fast &&"
- " Subtarget->hasVFP4()) || "
- "Subtarget->isTargetDarwin()">;
+ "!Subtarget->isTargetDarwin() &&"
+ "Subtarget->useFPVMLx()">;
def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
@@ -387,6 +394,10 @@ let RecomputePerFunction = 1 in {
def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+// Armv8.5-A extensions
+def HasSB : Predicate<"Subtarget->hasSB()">,
+ AssemblerPredicate<"FeatureSB", "sb">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -415,24 +426,22 @@ def imm16_31 : ImmLeaf<i32, [{
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
- if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
- return true;
-
- if (N->getOpcode() != ISD::SRA)
- return false;
- if (N->getOperand(0).getOpcode() != ISD::SHL)
- return false;
-
- auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
- ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
- if (!ShiftVal || ShiftVal->getZExtValue() != 16)
- return false;
+def sext_bottom_16 : PatFrag<(ops node:$a),
+ (sext_inreg node:$a, i16)>;
+def sext_top_16 : PatFrag<(ops node:$a),
+ (i32 (sra node:$a, (i32 16)))>;
- return true;
-}]>;
+def bb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>;
+def bt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>;
+def tb_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>;
+def tt_mul : PatFrag<(ops node:$a, node:$b),
+ (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>;
/// Split a 32-bit immediate into two 16 bit parts.
def hi16 : SDNodeXForm<imm, [{
@@ -713,7 +722,20 @@ def arm_i32imm : PatLeaf<(imm), [{
if (Subtarget->useMovt(*MF))
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]>;
+}]> {
+ // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
+ // the MachineFunction.
+ let GISelPredicateCode = [{
+ const auto &MF = *MI.getParent()->getParent();
+ if (STI.useMovt(MF))
+ return true;
+
+ const auto &MO = MI.getOperand(1);
+ if (!MO.isCImm())
+ return false;
+ return ARM_AM::isSOImmTwoPartVal(MO.getCImm()->getZExtValue());
+ }];
+}
/// imm0_1 predicate - Immediate in the range [0,1].
def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
@@ -2191,6 +2213,9 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
let Inst = 0xe7ffdefe;
}
+def : Pat<(debugtrap), (BKPT 0)>, Requires<[IsARM, HasV5T]>;
+def : Pat<(debugtrap), (UDF 254)>, Requires<[IsARM, NoV5T]>;
+
// Address computation and loads and stores in PIC mode.
let isNotDuplicable = 1 in {
def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -3321,7 +3346,7 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
@@ -3519,10 +3544,14 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src),
(SXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_sxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+ (SXTB16 GPR:$Src, rot_imm:$rot)>;
def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS),
(SXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+ (SXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
// Zero extenders
@@ -3544,6 +3573,8 @@ def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16 GPR:$Src, 1)>;
def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src),
(UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_uxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+ (UXTB16 GPR:$Src, rot_imm:$rot)>;
def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -3560,6 +3591,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS),
(UXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+ (UXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
def SBFX : I<(outs GPRnopc:$Rd),
@@ -3620,6 +3653,14 @@ let isAdd = 1 in
defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
+def : ARMPat<(ARMsubs GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_imm:$shift),
+ (SUBSrsi $Rn, so_reg_imm:$shift)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_reg:$shift),
+ (SUBSrsr $Rn, so_reg_reg:$shift)>;
+
+
let isAdd = 1 in
defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
@@ -4211,29 +4252,25 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sext_inreg GPR:$Rm, i16)))]>,
+ [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
- (sra GPR:$Rm, (i32 16))))]>,
+ [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV5TE]>,
Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
@@ -4257,35 +4294,31 @@ multiclass AI_smla<string opc> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add GPR:$Ra,
- (mul (sext_inreg GPRnopc:$Rn, i16),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sext_inreg GPRnopc:$Rm, i16))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
- (sra GPRnopc:$Rm, (i32 16)))))]>,
+ [(set GPRnopc:$Rd, (add GPR:$Ra,
+ (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>,
Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
@@ -4863,6 +4896,14 @@ def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary,
}
+// Armv8.5-A speculation barrier
+def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
+ Requires<[IsARM, HasSB]>, Sched<[]> {
+ let Inst{31-0} = 0xf57ff070;
+ let Unpredictable = 0x000fff0f;
+ let hasSideEffects = 1;
+}
+
let usesCustomInserter = 1, Defs = [CPSR] in {
// Pseudo instruction that combines movs + predicated rsbmi
@@ -4870,7 +4911,7 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
}
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [CPSR] in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
@@ -5778,26 +5819,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
- Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
(SMULBB GPR:$a, GPR:$b)>;
@@ -5902,6 +5938,8 @@ include "ARMInstrNEON.td"
// Memory barriers
def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>;
def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"ssbb", (DSB 0x0), 1>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"pssbb", (DSB 0x4), 1>, Requires<[IsARM, HasDB]>;
def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>;
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 4525eec8da03..96986e74415b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4305,17 +4305,29 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-
+def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
+ (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
+ (v8f16 (VMULslhq(v8f16 QPR:$src1),
+ (v4f16 (EXTRACT_SUBREG QPR:$src2,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfd DPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
+def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+ (VMULslhd DPR:$Rn,
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (i32 0))>;
def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
(VMULslfq QPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
-
+def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+ (VMULslhq QPR:$Rn,
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (i32 0))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
@@ -4390,16 +4402,16 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
v4f16, fmul_su, fadd_mlx>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
v8f16, fmul_su, fadd_mlx>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4620,16 +4632,16 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
v4f16, fmul, fsub>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
v8f16, fmul, fsub>,
- Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4734,6 +4746,12 @@ def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
+def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+ (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON,HasFullFP16]>;
+def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+ (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
@@ -5066,7 +5084,7 @@ def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
- "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
@@ -5091,6 +5109,54 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
(VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
}
+// +fp16fml Floating Point Multiplication Variants
+let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
+
+class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
+ RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+ : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+ asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
+ RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+ : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+ asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class VFMQ0<string opc, bits<2> S>
+ : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
+ (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
+ IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bit idx;
+ let Inst{3} = idx;
+ let Inst{19-16} = Vn{4-1};
+ let Inst{7} = Vn{0};
+ let Inst{5} = Vm{0};
+ let Inst{2-0} = Vm{3-1};
+}
+
+class VFMQ1<string opc, bits<2> S>
+ : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
+ IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bits<2> idx;
+ let Inst{5} = idx{1};
+ let Inst{3} = idx{0};
+}
+
+let hasNoSchedulingInfo = 1 in {
+// op1 op2 op3
+def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
+def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
+def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
+def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
+def VFMALDI : VFMQ0<"vfmal", 0b00>;
+def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
+def VFMALQI : VFMQ1<"vfmal", 0b00>;
+def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+}
+} // HasNEON, HasFP16FML
+
+
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
(VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
@@ -5455,17 +5521,17 @@ defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
"vmax", "u", umax, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
- v2f32, v2f32, fmaxnan, 1>;
+ v2f32, v2f32, fmaximum, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
- v4f32, v4f32, fmaxnan, 1>;
+ v4f32, v4f32, fmaximum, 1>;
def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f16",
- v4f16, v4f16, fmaxnan, 1>,
+ v4f16, v4f16, fmaximum, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f16",
- v8f16, v8f16, fmaxnan, 1>,
+ v8f16, v8f16, fmaximum, 1>,
Requires<[HasNEON, HasFullFP16]>;
// VMAXNM
@@ -5497,17 +5563,17 @@ defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
"vmin", "u", umin, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
- v2f32, v2f32, fminnan, 1>;
+ v2f32, v2f32, fminimum, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
- v4f32, v4f32, fminnan, 1>;
+ v4f32, v4f32, fminimum, 1>;
def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f16",
- v4f16, v4f16, fminnan, 1>,
+ v4f16, v4f16, fminimum, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f16",
- v8f16, v8f16, fminnan, 1>,
+ v8f16, v8f16, fminimum, 1>,
Requires<[HasNEON, HasFullFP16]>;
// VMINNM
@@ -6318,6 +6384,9 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
let Inst{19} = lane{0};
}
+def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32d DPR:$Vm, imm:$lane)>;
@@ -6332,6 +6401,10 @@ def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
+ (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
@@ -6341,12 +6414,18 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f16 (NEONvdup HPR:$src)),
+ (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+ HPR:$src, ssub_0), (i32 0)))>;
def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
(v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
(v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
+def : Pat<(v8f16 (NEONvdup HPR:$src)),
+ (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+ HPR:$src, ssub_0), (i32 0)))>;
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
@@ -6558,6 +6637,8 @@ def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
+def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
// VREV32 : Vector Reverse elements within 32-bit words
@@ -6647,13 +6728,14 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
+def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
+ (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
-def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
- (v2f32 DPR:$Vm),
- (i32 imm:$index))),
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
@@ -6663,6 +6745,9 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
+def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
+ (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
@@ -6671,9 +6756,7 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
let Inst{11} = index{0};
let Inst{10-8} = 0b000;
}
-def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
- (v4f32 QPR:$Vm),
- (i32 imm:$index))),
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
(VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
// VTRN : Vector Transpose
@@ -7001,19 +7084,19 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPatFP16<fmaxnan, VMAXhd>, Requires<[HasFullFP16]>;
-def : N3VSPatFP16<fminnan, VMINhd>, Requires<[HasFullFP16]>;
-def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
-def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
+def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
+def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
+def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 88aab47a79bf..b20b34eaa6a9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -781,7 +781,7 @@ defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
// These require base address to be written back or one of the loaded regs.
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
bits<3> Rn;
@@ -826,7 +826,8 @@ def : InstAlias<"ldm${p} $Rn!, $regs",
(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs), 0>,
Requires<[IsThumb, IsThumb1Only]>;
-let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
+ variadicOpsAreDefs = 1 in
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
"pop${p}\t$regs", []>,
@@ -1343,8 +1344,20 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
tGPR:$Rm))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
+
+ def tRSBS : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
}
+
+def : T1Pat<(ARMsubs tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_7:$imm3), (tSUBSi3 $Rn, imm0_7:$imm3)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_255:$imm8), (tSUBSi8 $Rn, imm0_255:$imm8)>;
+
+
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1380,6 +1393,9 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
let Inst{7-0} = imm8;
}
+def : Pat<(debugtrap), (tBKPT 0)>, Requires<[IsThumb, HasV5T]>;
+def : Pat<(debugtrap), (tUDF 254)>, Requires<[IsThumb, NoV5T]>;
+
def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0",
[(int_arm_undefined 249)]>, Encoding16,
Requires<[IsThumb, IsWindows]> {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f67075fbf9fd..7a6673b49d57 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1775,7 +1775,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
let hasSideEffects = 0 in {
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
multiclass thumb2_st_mult<string asm, InstrItinClass itin,
@@ -1997,6 +1997,10 @@ def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn),
(t2SXTB16 rGPR:$Rn, 0)>;
def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm),
(t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_sxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+ (t2SXTB16 rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+ (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
// A simple right-shift can also be used in most cases (the exception is the
@@ -2032,6 +2036,8 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm),
(t2UXTB16 rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+ (t2UXTB16 rGPR:$Rn, rot_imm:$rot)>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2062,6 +2068,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm),
(t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+ (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
@@ -2086,6 +2094,12 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>;
defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>;
defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_imm:$imm),
+ (t2SUBSri $Rn, t2_so_imm:$imm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+ (t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
+
let hasPostISelHook = 1 in {
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
@@ -2718,28 +2732,25 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
- [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16)))]>;
+ [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
- [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16))))]>;
+ [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
[(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
[(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
- (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)),
+ (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)),
(t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
-def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm),
(t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
(t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
@@ -2767,18 +2778,13 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
}
def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
- [(set rGPR:$Rd, (add rGPR:$Ra,
- (mul (sext_inreg rGPR:$Rn, i16),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sext_inreg rGPR:$Rm, i16))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
- [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
- (sra rGPR:$Rm, (i32 16)))))]>;
+ [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
[(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
@@ -2786,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_bottom_16 rGPR:$Rm))),
+ (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+ (sext_top_16 rGPR:$Rm))),
(t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
- (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn),
+ sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
@@ -3223,6 +3232,14 @@ def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary,
}
}
+// Armv8.5-A speculation barrier
+def t2SB : Thumb2XI<(outs), (ins), AddrModeNone, 4, NoItinerary, "sb", "", []>,
+ Requires<[IsThumb2, HasSB]>, Sched<[]> {
+ let Inst{31-0} = 0xf3bf8f70;
+ let Unpredictable = 0x000f2f0f;
+ let hasSideEffects = 1;
+}
+
class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -4429,13 +4446,13 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
-let AddedComplexity = 8 in {
- def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
- def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
- def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
- def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+let AddedComplexity = 8, Predicates = [IsThumb, HasAcquireRelease, HasV7Clrex] in {
+ def : Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
}
@@ -4538,6 +4555,12 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+
+// Non-predicable aliases of a predicable DSB: the predicate is (14, 0) where
+// 14 = AL (always execute) and 0 = "instruction doesn't read the CPSR".
+def : InstAlias<"ssbb", (t2DSB 0x0, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+
// Armv8-R 'Data Full Barrier'
def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 2f14b78c91fd..b58730c452f7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -725,9 +725,11 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
}
def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
- (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+ (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
- (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+ (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -746,9 +748,11 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
- (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>;
+ (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
- (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+ (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -1810,7 +1814,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
@@ -1819,7 +1823,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1832,17 +1836,17 @@ def VMLAH : AHbI<0b11100, 0b00, 0, 0,
[(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
(VMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>;
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
@@ -1851,7 +1855,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
@@ -1860,7 +1864,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1873,17 +1877,17 @@ def VMLSH : AHbI<0b11100, 0b00, 1, 0,
[(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
(VMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1891,7 +1895,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
@@ -1900,7 +1904,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1913,29 +1917,29 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
[(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
// (-(a * b) - dst) -> -(dst + (a * b))
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin),
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
// (-dst - (a * b)) -> -(dst + (a * b))
def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)),
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1943,7 +1947,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
@@ -1951,7 +1955,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -1963,17 +1967,17 @@ def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
[(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin),
(VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
- Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
//===----------------------------------------------------------------------===//
// Fused FP Multiply-Accumulate Operations.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 6692a4d41420..293e734c97cd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -76,6 +76,42 @@ private:
const ARMRegisterBankInfo &RBI;
const ARMSubtarget &STI;
+ // Store the opcodes that we might need, so we don't have to check what kind
+ // of subtarget (ARM vs Thumb) we have all the time.
+ struct OpcodeCache {
+ unsigned ZEXT16;
+ unsigned SEXT16;
+
+ unsigned ZEXT8;
+ unsigned SEXT8;
+
+ // Used for implementing ZEXT/SEXT from i1
+ unsigned AND;
+ unsigned RSB;
+
+ unsigned STORE32;
+ unsigned LOAD32;
+
+ unsigned STORE16;
+ unsigned LOAD16;
+
+ unsigned STORE8;
+ unsigned LOAD8;
+
+ OpcodeCache(const ARMSubtarget &STI);
+ } const Opcodes;
+
+ // Select the opcode for simple extensions (that translate to a single SXT/UXT
+ // instruction). Extension operations more complicated than that should not
+ // invoke this. Returns the original opcode if it doesn't know how to select a
+ // better one.
+ unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) const;
+
+ // Select the opcode for simple loads and stores. Returns the original opcode
+ // if it doesn't know how to select a better one.
+ unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+ unsigned Size) const;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -107,7 +143,7 @@ ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI)
: InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI),
+ TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
@@ -225,41 +261,63 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
return true;
}
-/// Select the opcode for simple extensions (that translate to a single SXT/UXT
-/// instruction). Extension operations more complicated than that should not
-/// invoke this. Returns the original opcode if it doesn't know how to select a
-/// better one.
-static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ARMInstructionSelector::OpcodeCache::OpcodeCache(const ARMSubtarget &STI) {
+ bool isThumb = STI.isThumb();
+
+ using namespace TargetOpcode;
+
+#define STORE_OPCODE(VAR, OPC) VAR = isThumb ? ARM::t2##OPC : ARM::OPC
+ STORE_OPCODE(SEXT16, SXTH);
+ STORE_OPCODE(ZEXT16, UXTH);
+
+ STORE_OPCODE(SEXT8, SXTB);
+ STORE_OPCODE(ZEXT8, UXTB);
+
+ STORE_OPCODE(AND, ANDri);
+ STORE_OPCODE(RSB, RSBri);
+
+ STORE_OPCODE(STORE32, STRi12);
+ STORE_OPCODE(LOAD32, LDRi12);
+
+ // LDRH/STRH are special...
+ STORE16 = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ LOAD16 = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+
+ STORE_OPCODE(STORE8, STRBi12);
+ STORE_OPCODE(LOAD8, LDRBi12);
+#undef MAP_OPCODE
+}
+
+unsigned ARMInstructionSelector::selectSimpleExtOpc(unsigned Opc,
+ unsigned Size) const {
using namespace TargetOpcode;
if (Size != 8 && Size != 16)
return Opc;
if (Opc == G_SEXT)
- return Size == 8 ? ARM::SXTB : ARM::SXTH;
+ return Size == 8 ? Opcodes.SEXT8 : Opcodes.SEXT16;
if (Opc == G_ZEXT)
- return Size == 8 ? ARM::UXTB : ARM::UXTH;
+ return Size == 8 ? Opcodes.ZEXT8 : Opcodes.ZEXT16;
return Opc;
}
-/// Select the opcode for simple loads and stores. For types smaller than 32
-/// bits, the value will be zero extended. Returns the original opcode if it
-/// doesn't know how to select a better one.
-static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
- unsigned Size) {
+unsigned ARMInstructionSelector::selectLoadStoreOpCode(unsigned Opc,
+ unsigned RegBank,
+ unsigned Size) const {
bool isStore = Opc == TargetOpcode::G_STORE;
if (RegBank == ARM::GPRRegBankID) {
switch (Size) {
case 1:
case 8:
- return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+ return isStore ? Opcodes.STORE8 : Opcodes.LOAD8;
case 16:
- return isStore ? ARM::STRH : ARM::LDRH;
+ return isStore ? Opcodes.STORE16 : Opcodes.LOAD16;
case 32:
- return isStore ? ARM::STRi12 : ARM::LDRi12;
+ return isStore ? Opcodes.STORE32 : Opcodes.LOAD32;
default:
return Opc;
}
@@ -702,7 +760,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
switch (SrcSize) {
case 1: {
// ZExt boils down to & 0x1; for SExt we also subtract that from 0
- I.setDesc(TII.get(ARM::ANDri));
+ I.setDesc(TII.get(Opcodes.AND));
MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
if (isSExt) {
@@ -714,7 +772,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
auto InsertBefore = std::next(I.getIterator());
auto SubI =
- BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.RSB))
.addDef(SExtResult)
.addUse(AndResult)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 891418306903..4a0c24d58474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -75,13 +75,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
- getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+ if (ST.isThumb1Only()) {
+ // Thumb1 is not supported yet.
+ computeTables();
+ verify(*ST.getInstrInfo());
+ return;
+ }
+
+ getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+ .legalForCartesianProduct({s32}, {s1, s8, s16});
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
.minScalar(0, s32);
+ getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
+ getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32, p0})
+ .clampScalar(0, s32, s32);
+
+ // We're keeping these builders around because we'll want to add support for
+ // floating point to them.
+ auto &LoadStoreBuilder =
+ getActionDefinitionsBuilder({G_LOAD, G_STORE})
+ .legalForTypesWithMemSize({
+ {s1, p0, 8},
+ {s8, p0, 8},
+ {s16, p0, 16},
+ {s32, p0, 32},
+ {p0, p0, 32}});
+
+ if (ST.isThumb()) {
+ // FIXME: merge with the code for non-Thumb.
+ computeTables();
+ verify(*ST.getInstrInfo());
+ return;
+ }
+
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
if (ST.hasDivideInARMMode())
getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32})
@@ -101,14 +136,24 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
- getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
- .legalForCartesianProduct({s32}, {s1, s8, s16});
-
- getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
- getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
-
getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
+ if (ST.hasV5TOps()) {
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .lowerFor({s32})
+ .clampScalar(0, s32, s32);
+ } else {
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .libcallFor({s32})
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ)
+ .lowerFor({s32})
+ .clampScalar(0, s32, s32);
+ }
+
getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},
@@ -116,20 +161,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
- getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({s32, p0})
- .clampScalar(0, s32, s32);
-
getActionDefinitionsBuilder(G_ICMP)
.legalForCartesianProduct({s1}, {s32, p0})
.minScalar(1, s32);
// We're keeping these builders around because we'll want to add support for
// floating point to them.
- auto &LoadStoreBuilder =
- getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalForCartesianProduct({s1, s8, s16, s32, p0}, {p0});
-
auto &PhiBuilder =
getActionDefinitionsBuilder(G_PHI).legalFor({s32, p0}).minScalar(0, s32);
@@ -302,7 +339,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
using namespace TargetOpcode;
MIRBuilder.setInstr(MI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index 78ab9412c04b..527bf87f1093 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/IR/Instructions.h"
@@ -29,7 +30,8 @@ public:
ARMLegalizerInfo(const ARMSubtarget &ST);
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
private:
void setFCmpLibcallsGNU();
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index db5f28480e90..6da7430a8e51 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1027,6 +1027,18 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
CanMergeToLSMulti = CanMergeToLSDouble = false;
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+ unsigned Limit;
+ switch (Opcode) {
+ default:
+ Limit = UINT_MAX;
+ break;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ }
+
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@@ -1036,6 +1048,8 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
unsigned Reg = MO.getReg();
if (Reg == ARM::SP || Reg == ARM::PC)
break;
+ if (Count == Limit)
+ break;
// See if the current load/store may be part of a multi load/store.
unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
@@ -1303,7 +1317,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
MBB.erase(MBBI);
return true;
@@ -1527,7 +1541,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
MIB.add(MO);
- MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.setMemRefs(MI.memoperands());
MBB.erase(MBBI);
return true;
@@ -1834,7 +1848,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
return M0->InsertPos < M1->InsertPos;
};
- llvm::sort(Candidates.begin(), Candidates.end(), LessThan);
+ llvm::sort(Candidates, LessThan);
// Go through list of candidates and merge.
bool Changed = false;
@@ -2172,13 +2186,12 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
bool RetVal = false;
// Sort by offset (in reverse order).
- llvm::sort(Ops.begin(), Ops.end(),
- [](const MachineInstr *LHS, const MachineInstr *RHS) {
- int LOffset = getMemoryOpOffset(*LHS);
- int ROffset = getMemoryOpOffset(*RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- });
+ llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
+ int LOffset = getMemoryOpOffset(*LHS);
+ int ROffset = getMemoryOpOffset(*RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
// The loads / stores of the same base are in order. Scan them from first to
// last and check for the following:
@@ -2290,7 +2303,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ MIB.cloneMergedMemRefs({Op0, Op1});
LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
@@ -2304,7 +2317,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
- MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+ MIB.cloneMergedMemRefs({Op0, Op1});
LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index d11fe9d5c502..df1da9d8e474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -23,20 +23,13 @@ namespace llvm {
static bool isAESPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- switch(SecondOpcode) {
+ switch(SecondMI.getOpcode()) {
// AES encode.
case ARM::AESMC :
- return FirstOpcode == ARM::AESE ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESE;
// AES decode.
case ARM::AESIMC:
- return FirstOpcode == ARM::AESD ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESD;
}
return false;
@@ -46,15 +39,8 @@ static bool isAESPair(const MachineInstr *FirstMI,
static bool isLiteralsPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- // 32 bit immediate.
- if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
- FirstOpcode == ARM::MOVi16) &&
- SecondOpcode == ARM::MOVTi16)
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == ARM::MOVi16) &&
+ SecondMI.getOpcode() == ARM::MOVTi16)
return true;
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
index 1e4fc6687eae..b3abd7b593a1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+#define LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -22,3 +25,5 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
} // llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 9d5478b76c18..fc3258914f92 100644
--- a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -42,6 +42,10 @@ using namespace PatternMatch;
STATISTIC(NumSMLAD , "Number of smlad instructions generated");
+static cl::opt<bool>
+DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
+ cl::desc("Disable the ARM Parallel DSP pass"));
+
namespace {
struct OpChain;
struct BinOpChain;
@@ -67,7 +71,7 @@ namespace {
virtual ~OpChain() = default;
void SetMemoryLocations() {
- const auto Size = MemoryLocation::UnknownSize;
+ const auto Size = LocationSize::unknown();
for (auto *V : AllValues) {
if (auto *I = dyn_cast<Instruction>(V)) {
if (I->mayWriteToMemory())
@@ -88,12 +92,15 @@ namespace {
struct BinOpChain : public OpChain {
ValueList LHS; // List of all (narrow) left hand operands.
ValueList RHS; // List of all (narrow) right hand operands.
+ bool Exchange = false;
BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
OpChain(I, lhs), LHS(lhs), RHS(rhs) {
for (auto *V : RHS)
AllValues.push_back(V);
}
+
+ bool AreSymmetrical(BinOpChain *Other);
};
struct Reduction {
@@ -101,9 +108,9 @@ namespace {
// pattern matching.
Instruction *AccIntAdd; // The accumulating integer add statement,
// i.e, the reduction statement.
-
OpChainList MACCandidates; // The MAC candidates associated with
// this reduction statement.
+ PMACPairList PMACPairs;
Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };
};
@@ -116,12 +123,16 @@ namespace {
Loop *L;
const DataLayout *DL;
Module *M;
+ std::map<LoadInst*, LoadInst*> LoadPairs;
+ std::map<LoadInst*, SmallVector<LoadInst*, 4>> SequentialLoads;
- bool InsertParallelMACs(Reduction &Reduction, PMACPairList &PMACPairs);
+ bool RecordSequentialLoads(BasicBlock *Header);
+ bool InsertParallelMACs(Reduction &Reduction);
bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
- PMACPairList CreateParallelMACPairs(OpChainList &Candidates);
+ void CreateParallelMACPairs(Reduction &R);
Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc, Instruction *InsertAfter);
+ Instruction *Acc, bool Exchange,
+ Instruction *InsertAfter);
/// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
/// Dual performs two signed 16x16-bit multiplications. It adds the
@@ -149,6 +160,8 @@ namespace {
}
bool runOnLoop(Loop *TheLoop, LPPassManager &) override {
+ if (DisableParallelDSP)
+ return false;
L = TheLoop;
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -192,7 +205,14 @@ namespace {
LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
bool Changes = false;
- LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n\n");
+ LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
+ LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
+
+ if (!RecordSequentialLoads(Header)) {
+ LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
+ return false;
+ }
+
Changes = MatchSMLAD(F);
return Changes;
}
@@ -245,57 +265,14 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
return false;
}
-// Element-by-element comparison of Value lists returning true if they are
-// instructions with the same opcode or constants with the same value.
-static bool AreSymmetrical(const ValueList &VL0,
- const ValueList &VL1) {
- if (VL0.size() != VL1.size()) {
- LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
- << VL0.size() << " != " << VL1.size() << "\n");
- return false;
- }
-
- const unsigned Pairs = VL0.size();
- LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
-
- for (unsigned i = 0; i < Pairs; ++i) {
- const Value *V0 = VL0[i];
- const Value *V1 = VL1[i];
- const auto *Inst0 = dyn_cast<Instruction>(V0);
- const auto *Inst1 = dyn_cast<Instruction>(V1);
-
- LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
- dbgs() << "mul1: "; V0->dump();
- dbgs() << "mul2: "; V1->dump());
-
- if (!Inst0 || !Inst1)
- return false;
-
- if (Inst0->isSameOperationAs(Inst1)) {
- LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
- continue;
- }
-
- const APInt *C0, *C1;
- if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
- return true;
-}
-
template<typename MemInst>
static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
- MemInstList &VecMem, const DataLayout &DL,
- ScalarEvolution &SE) {
+ const DataLayout &DL, ScalarEvolution &SE) {
if (!MemOp0->isSimple() || !MemOp1->isSimple()) {
LLVM_DEBUG(dbgs() << "No, not touching volatile access\n");
return false;
}
if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE)) {
- VecMem.push_back(MemOp0);
- VecMem.push_back(MemOp1);
LLVM_DEBUG(dbgs() << "OK: accesses are consecutive.\n");
return true;
}
@@ -318,82 +295,156 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
return false;
}
- return AreSequentialAccesses<LoadInst>(Ld0, Ld1, VecMem, *DL, *SE);
+ if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
+ return false;
+
+ VecMem.clear();
+ VecMem.push_back(Ld0);
+ VecMem.push_back(Ld1);
+ return true;
}
-PMACPairList
-ARMParallelDSP::CreateParallelMACPairs(OpChainList &Candidates) {
+/// Iterate through the block and record base, offset pairs of loads as well as
+/// maximal sequences of sequential loads.
+bool ARMParallelDSP::RecordSequentialLoads(BasicBlock *Header) {
+ SmallVector<LoadInst*, 8> Loads;
+ for (auto &I : *Header) {
+ auto *Ld = dyn_cast<LoadInst>(&I);
+ if (!Ld)
+ continue;
+ Loads.push_back(Ld);
+ }
+
+ std::map<LoadInst*, LoadInst*> BaseLoads;
+
+ for (auto *Ld0 : Loads) {
+ for (auto *Ld1 : Loads) {
+ if (Ld0 == Ld1)
+ continue;
+
+ if (AreSequentialAccesses<LoadInst>(Ld0, Ld1, *DL, *SE)) {
+ LoadPairs[Ld0] = Ld1;
+ if (BaseLoads.count(Ld0)) {
+ LoadInst *Base = BaseLoads[Ld0];
+ BaseLoads[Ld1] = Base;
+ SequentialLoads[Base].push_back(Ld1);
+ } else {
+ BaseLoads[Ld1] = Ld0;
+ SequentialLoads[Ld0].push_back(Ld1);
+ }
+ }
+ }
+ }
+ return LoadPairs.size() > 1;
+}
+
+void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
+ OpChainList &Candidates = R.MACCandidates;
+ PMACPairList &PMACPairs = R.PMACPairs;
const unsigned Elems = Candidates.size();
- PMACPairList PMACPairs;
if (Elems < 2)
- return PMACPairs;
+ return;
- // TODO: for now we simply try to match consecutive pairs i and i+1.
- // We can compare all elements, but then we need to compare and evaluate
- // different solutions.
- for(unsigned i=0; i<Elems-1; i+=2) {
- BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
- BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[i+1].get());
- const Instruction *Mul0 = PMul0->Root;
- const Instruction *Mul1 = PMul1->Root;
+ auto CanPair = [&](BinOpChain *PMul0, BinOpChain *PMul1) {
+ if (!PMul0->AreSymmetrical(PMul1))
+ return false;
+
+ // The first elements of each vector should be loads with sexts. If we
+ // find that its two pairs of consecutive loads, then these can be
+ // transformed into two wider loads and the users can be replaced with
+ // DSP intrinsics.
+ for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
+ auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
+ auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
+ auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
+ auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
+
+ if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
+ return false;
- if (Mul0 == Mul1)
+ LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n"
+ << "\t Ld0: " << *Ld0 << "\n"
+ << "\t Ld1: " << *Ld1 << "\n"
+ << "and operands " << x + 2 << ":\n"
+ << "\t Ld2: " << *Ld2 << "\n"
+ << "\t Ld3: " << *Ld3 << "\n");
+
+ if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+ if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ return true;
+ } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
+ PMul1->Exchange = true;
+ PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ return true;
+ }
+ } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+ AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
+ LLVM_DEBUG(dbgs() << " and swapping muls\n");
+ PMul0->Exchange = true;
+ // Only the second operand can be exchanged, so swap the muls.
+ PMACPairs.push_back(std::make_pair(PMul1, PMul0));
+ return true;
+ }
+ }
+ return false;
+ };
+
+ SmallPtrSet<const Instruction*, 4> Paired;
+ for (unsigned i = 0; i < Elems; ++i) {
+ BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
+ if (Paired.count(PMul0->Root))
continue;
- LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
- dbgs() << "- "; Mul0->dump();
- dbgs() << "- "; Mul1->dump());
+ for (unsigned j = 0; j < Elems; ++j) {
+ if (i == j)
+ continue;
- const ValueList &Mul0_LHS = PMul0->LHS;
- const ValueList &Mul0_RHS = PMul0->RHS;
- const ValueList &Mul1_LHS = PMul1->LHS;
- const ValueList &Mul1_RHS = PMul1->RHS;
+ BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[j].get());
+ if (Paired.count(PMul1->Root))
+ continue;
- if (!AreSymmetrical(Mul0_LHS, Mul1_LHS) ||
- !AreSymmetrical(Mul0_RHS, Mul1_RHS))
- continue;
+ const Instruction *Mul0 = PMul0->Root;
+ const Instruction *Mul1 = PMul1->Root;
+ if (Mul0 == Mul1)
+ continue;
- LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
- // The first elements of each vector should be loads with sexts. If we find
- // that its two pairs of consecutive loads, then these can be transformed
- // into two wider loads and the users can be replaced with DSP
- // intrinsics.
- for (unsigned x = 0; x < Mul0_LHS.size(); x += 2) {
- auto *Ld0 = dyn_cast<LoadInst>(Mul0_LHS[x]);
- auto *Ld1 = dyn_cast<LoadInst>(Mul1_LHS[x]);
- auto *Ld2 = dyn_cast<LoadInst>(Mul0_RHS[x]);
- auto *Ld3 = dyn_cast<LoadInst>(Mul1_RHS[x]);
-
- LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n";
- dbgs() << "\t mul1: "; Mul0_LHS[x]->dump();
- dbgs() << "\t mul2: "; Mul1_LHS[x]->dump();
- dbgs() << "and operands " << x + 2 << ":\n";
- dbgs() << "\t mul1: "; Mul0_RHS[x]->dump();
- dbgs() << "\t mul2: "; Mul1_RHS[x]->dump());
-
- if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd) &&
- AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ assert(PMul0 != PMul1 && "expected different chains");
+
+ LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
+ dbgs() << "- "; Mul0->dump();
+ dbgs() << "- "; Mul1->dump());
+
+ LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
+ if (CanPair(PMul0, PMul1)) {
+ Paired.insert(Mul0);
+ Paired.insert(Mul1);
+ break;
}
}
}
- return PMACPairs;
}
-bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction,
- PMACPairList &PMACPairs) {
+bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
Instruction *Acc = Reduction.Phi;
Instruction *InsertAfter = Reduction.AccIntAdd;
- for (auto &Pair : PMACPairs) {
+ for (auto &Pair : Reduction.PMACPairs) {
+ BinOpChain *PMul0 = Pair.first;
+ BinOpChain *PMul1 = Pair.second;
LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";
- dbgs() << "- "; Pair.first->Root->dump();
- dbgs() << "- "; Pair.second->Root->dump());
- auto *VecLd0 = cast<LoadInst>(Pair.first->VecLd[0]);
- auto *VecLd1 = cast<LoadInst>(Pair.second->VecLd[0]);
- Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, InsertAfter);
+ dbgs() << "- "; PMul0->Root->dump();
+ dbgs() << "- "; PMul1->Root->dump());
+
+ auto *VecLd0 = cast<LoadInst>(PMul0->VecLd[0]);
+ auto *VecLd1 = cast<LoadInst>(PMul1->VecLd[0]);
+ Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, PMul1->Exchange, InsertAfter);
InsertAfter = Acc;
}
@@ -420,7 +471,7 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
for (PHINode &Phi : Header->phis()) {
const auto *Ty = Phi.getType();
- if (!Ty->isIntegerTy(32))
+ if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
continue;
const bool IsReduction =
@@ -447,10 +498,11 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
}
static void AddMACCandidate(OpChainList &Candidates,
- const Instruction *Acc,
- Value *MulOp0, Value *MulOp1, int MulOpNum) {
- Instruction *Mul = dyn_cast<Instruction>(Acc->getOperand(MulOpNum));
+ Instruction *Mul,
+ Value *MulOp0, Value *MulOp1) {
LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
+ assert(Mul->getOpcode() == Instruction::Mul &&
+ "expected mul instruction");
ValueList LHS;
ValueList RHS;
if (IsNarrowSequence<16>(MulOp0, LHS) &&
@@ -462,31 +514,38 @@ static void AddMACCandidate(OpChainList &Candidates,
static void MatchParallelMACSequences(Reduction &R,
OpChainList &Candidates) {
- const Instruction *Acc = R.AccIntAdd;
- Value *A, *MulOp0, *MulOp1;
- LLVM_DEBUG(dbgs() << "\n- Analysing:\t"; Acc->dump());
-
- // Pattern 1: the accumulator is the RHS of the mul.
- while(match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)),
- m_Value(A)))){
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
- Acc = dyn_cast<Instruction>(A);
- }
- // Pattern 2: the accumulator is the LHS of the mul.
- while(match(Acc, m_Add(m_Value(A),
- m_Mul(m_Value(MulOp0), m_Value(MulOp1))))) {
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 1);
- Acc = dyn_cast<Instruction>(A);
- }
+ Instruction *Acc = R.AccIntAdd;
+ LLVM_DEBUG(dbgs() << "\n- Analysing:\t" << *Acc);
- // The last mul in the chain has a slightly different pattern:
- // the mul is the first operand
- if (match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)), m_Value(A))))
- AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
+ // Returns false to signal the search should be stopped.
+ std::function<bool(Value*)> Match =
+ [&Candidates, &Match](Value *V) -> bool {
- // Because we start at the bottom of the chain, and we work our way up,
- // the muls are added in reverse program order to the list.
- std::reverse(Candidates.begin(), Candidates.end());
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ if (Match(I->getOperand(0)) || (Match(I->getOperand(1))))
+ return true;
+ break;
+ case Instruction::Mul: {
+ Value *MulOp0 = I->getOperand(0);
+ Value *MulOp1 = I->getOperand(1);
+ if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1))
+ AddMACCandidate(Candidates, I, MulOp0, MulOp1);
+ return false;
+ }
+ case Instruction::SExt:
+ return Match(I->getOperand(0));
+ }
+ return false;
+ };
+
+ while (Match (Acc));
+ LLVM_DEBUG(dbgs() << "Finished matching MAC sequences, found "
+ << Candidates.size() << " candidates.\n");
}
// Collects all instructions that are not part of the MAC chains, which is the
@@ -621,45 +680,100 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
for (auto &R : Reductions) {
if (AreAliased(AA, Reads, Writes, R.MACCandidates))
return false;
- PMACPairList PMACPairs = CreateParallelMACPairs(R.MACCandidates);
- Changed |= InsertParallelMACs(R, PMACPairs);
+ CreateParallelMACPairs(R);
+ Changed |= InsertParallelMACs(R);
}
LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););
return Changed;
}
-static void CreateLoadIns(IRBuilder<NoFolder> &IRB, Instruction *Acc,
- LoadInst **VecLd) {
- const Type *AccTy = Acc->getType();
- const unsigned AddrSpace = (*VecLd)->getPointerAddressSpace();
+static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
+ const Type *LoadTy) {
+ const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
- Value *VecPtr = IRB.CreateBitCast((*VecLd)->getPointerOperand(),
- AccTy->getPointerTo(AddrSpace));
- *VecLd = IRB.CreateAlignedLoad(VecPtr, (*VecLd)->getAlignment());
+ Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
+ LoadTy->getPointerTo(AddrSpace));
+ return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
}
Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc,
+ Instruction *Acc, bool Exchange,
Instruction *InsertAfter) {
- LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n";
- dbgs() << "- "; VecLd0->dump();
- dbgs() << "- "; VecLd1->dump();
- dbgs() << "- "; Acc->dump());
+ LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n"
+ << "- " << *VecLd0 << "\n"
+ << "- " << *VecLd1 << "\n"
+ << "- " << *Acc << "\n"
+ << "Exchange: " << Exchange << "\n");
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
++BasicBlock::iterator(InsertAfter));
// Replace the reduction chain with an intrinsic call
- CreateLoadIns(Builder, Acc, &VecLd0);
- CreateLoadIns(Builder, Acc, &VecLd1);
- Value* Args[] = { VecLd0, VecLd1, Acc };
- Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
+ const Type *Ty = IntegerType::get(M->getContext(), 32);
+ LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
+ LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
+ Value* Args[] = { NewLd0, NewLd1, Acc };
+ Function *SMLAD = nullptr;
+ if (Exchange)
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
+ else
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
CallInst *Call = Builder.CreateCall(SMLAD, Args);
NumSMLAD++;
return Call;
}
+// Compare the value lists in Other to this chain.
+bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
+ // Element-by-element comparison of Value lists returning true if they are
+ // instructions with the same opcode or constants with the same value.
+ auto CompareValueList = [](const ValueList &VL0,
+ const ValueList &VL1) {
+ if (VL0.size() != VL1.size()) {
+ LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
+ << VL0.size() << " != " << VL1.size() << "\n");
+ return false;
+ }
+
+ const unsigned Pairs = VL0.size();
+ LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
+
+ for (unsigned i = 0; i < Pairs; ++i) {
+ const Value *V0 = VL0[i];
+ const Value *V1 = VL1[i];
+ const auto *Inst0 = dyn_cast<Instruction>(V0);
+ const auto *Inst1 = dyn_cast<Instruction>(V1);
+
+ LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
+ dbgs() << "mul1: "; V0->dump();
+ dbgs() << "mul2: "; V1->dump());
+
+ if (!Inst0 || !Inst1)
+ return false;
+
+ if (Inst0->isSameOperationAs(Inst1)) {
+ LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
+ continue;
+ }
+
+ const APInt *C0, *C1;
+ if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
+ return true;
+ };
+
+ return CompareValueList(LHS, Other->LHS) &&
+ CompareValueList(RHS, Other->RHS);
+}
+
Pass *llvm::createARMParallelDSPPass() {
return new ARMParallelDSP();
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 0e16d6bcfe2b..4f28f2dafc70 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -234,6 +234,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_GEP:
case G_INTTOPTR:
case G_PTRTOINT:
+ case G_CTLZ:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f42cbbda1b71..b1d0761e3231 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -188,8 +188,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
assert(hasV6T2Ops() || !hasThumb2());
// Execute only support requires movt support
- if (genExecuteOnly())
- assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target");
+ if (genExecuteOnly()) {
+ NoMovt = false;
+ assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
+ }
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -287,7 +289,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexR7:
case CortexM3:
case CortexR52:
- case ExynosM1:
+ break;
+ case Exynos:
+ LdStMultipleTiming = SingleIssuePlusExtras;
+ MaxInterleaveFactor = 4;
+ if (!isThumb())
+ PrefLoopAlignment = 3;
+ break;
case Kryo:
break;
case Krait:
@@ -370,7 +378,8 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
- return isTargetWatchABI() || (isSwift() && !MF.getFunction().optForMinSize());
+ return isTargetWatchABI() ||
+ (useWideStrideVFP() && !MF.getFunction().optForMinSize());
}
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 74aee9a8ed38..11841b4467a2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -68,7 +68,7 @@ protected:
CortexR5,
CortexR52,
CortexR7,
- ExynosM1,
+ Exynos,
Krait,
Kryo,
Swift
@@ -106,6 +106,7 @@ protected:
ARMv82a,
ARMv83a,
ARMv84a,
+ ARMv85a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
@@ -153,6 +154,7 @@ protected:
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
+ bool HasV8_5aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
@@ -227,6 +229,9 @@ protected:
/// HasFullFP16 - True if subtarget supports half-precision FP operations
bool HasFullFP16 = false;
+ /// HasFP16FML - True if subtarget supports half-precision FP fml operations
+ bool HasFP16FML = false;
+
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
bool HasD16 = false;
@@ -353,6 +358,9 @@ protected:
/// If true, loading into a D subregister will be penalized.
bool SlowLoadDSubregister = false;
+ /// If true, use a wider stride when allocating VFP registers.
+ bool UseWideStrideVFP = false;
+
/// If true, the AGU and NEON/FPU units are multiplexed.
bool HasMuxedUnits = false;
@@ -408,6 +416,9 @@ protected:
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
+ /// Has speculation barrier
+ bool HasSB = false;
+
/// Implicitly convert an instruction to a different one if its immediates
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
@@ -432,6 +443,9 @@ protected:
/// operand cycle returned by the itinerary data for pre-ISel operands.
int PreISelOperandLatencyAdjustment = 2;
+ /// What alignment is preferred for loop bodies, in log2(bytes).
+ unsigned PrefLoopAlignment = 0;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
@@ -529,6 +543,7 @@ public:
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
+ bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
@@ -596,6 +611,7 @@ public:
bool hasVMLxHazards() const { return HasVMLxHazards; }
bool hasSlowOddRegister() const { return SlowOddRegister; }
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
+ bool useWideStrideVFP() const { return UseWideStrideVFP; }
bool hasMuxedUnits() const { return HasMuxedUnits; }
bool dontWidenVMOVS() const { return DontWidenVMOVS; }
bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
@@ -612,12 +628,14 @@ public:
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
bool useSjLjEH() const { return UseSjLjEH; }
+ bool hasSB() const { return HasSB; }
bool genLongCalls() const { return GenLongCalls; }
bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFP16FML() const { return HasFP16FML; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
@@ -796,6 +814,10 @@ public:
bool allowPositionIndependentMovt() const {
return isROPI() || !isTargetELF();
}
+
+ unsigned getPrefLoopAlignment() const {
+ return PrefLoopAlignment;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 519f789fc215..ec02c840d5e1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -194,12 +194,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
/// Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
@@ -210,7 +204,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index d0620761ea9c..9c13359cba71 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -32,7 +32,8 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
- // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+ bool genExecuteOnly =
+ ARM_TM.getMCSubtargetInfo()->hasFeature(ARM::FeatureExecuteOnly);
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(isAAPCS_ABI);
@@ -40,6 +41,17 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
if (isAAPCS_ABI) {
LSDASection = nullptr;
}
+
+ // Make code section unreadable when in execute-only mode
+ if (genExecuteOnly) {
+ unsigned Type = ELF::SHT_PROGBITS;
+ unsigned Flags =
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
+ // Since we cannot modify flags for an existing section, we create a new
+ // section with the right flags, and use 0 as the unique ID for
+ // execute-only text
+ TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+ }
}
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 94f9cefe429c..f72bb8632eb7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -77,8 +77,8 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 1;
return ST->hasV6T2Ops() ? 2 : 3;
}
- // Thumb1.
- if (SImmVal >= 0 && SImmVal < 256)
+ // Thumb1, any i8 imm cost 1.
+ if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
return 1;
if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
return 2;
@@ -400,10 +400,29 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- // We only handle costs of reverse and select shuffles for now.
- if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (Kind == TTI::SK_Broadcast) {
+ static const CostTblEntry NEONDupTbl[] = {
+ // VDUP handles these cases.
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
if (Kind == TTI::SK_Reverse) {
static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
@@ -412,6 +431,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
@@ -542,14 +563,17 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
- if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+ if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
+ !UseMaskForCond && !UseMaskForGaps) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
@@ -562,7 +586,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e0cd2d8e26a6..2dd143d48a15 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -57,7 +57,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const FeatureBitset InlineFeatureWhitelist = {
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
- ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
@@ -169,7 +169,9 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a5fbbbf26be9..3832b0112b87 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMFeatures.h"
+#include "InstPrinter/ARMInstPrinter.h"
#include "Utils/ARMBaseInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -631,6 +632,8 @@ public:
void ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses, SMLoc IDLoc,
OperandVector &Operands);
+ void doBeforeLabelEmit(MCSymbol *Symbol) override;
+
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -3203,17 +3206,26 @@ public:
} // end anonymous namespace.
void ARMOperand::print(raw_ostream &OS) const {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return ARMInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
switch (Kind) {
case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
case k_CCOut:
- OS << "<ccout " << getReg() << ">";
+ OS << "<ccout " << RegName(getReg()) << ">";
break;
case k_ITCondMask: {
static const char *const MaskStr[] = {
- "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
- "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+ "(invalid)", "(teee)", "(tee)", "(teet)",
+ "(te)", "(tete)", "(tet)", "(tett)",
+ "(t)", "(ttee)", "(tte)", "(ttet)",
+ "(tt)", "(ttte)", "(ttt)", "(tttt)"
};
assert((ITMask.Mask & 0xf) == ITMask.Mask);
OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
@@ -3247,13 +3259,25 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<ARM_TSB::" << TraceSyncBOptToString(getTraceSyncBarrierOpt()) << ">";
break;
case k_Memory:
- OS << "<memory "
- << " base:" << Memory.BaseRegNum;
+ OS << "<memory";
+ if (Memory.BaseRegNum)
+ OS << " base:" << RegName(Memory.BaseRegNum);
+ if (Memory.OffsetImm)
+ OS << " offset-imm:" << *Memory.OffsetImm;
+ if (Memory.OffsetRegNum)
+ OS << " offset-reg:" << (Memory.isNegative ? "-" : "")
+ << RegName(Memory.OffsetRegNum);
+ if (Memory.ShiftType != ARM_AM::no_shift) {
+ OS << " shift-type:" << ARM_AM::getShiftOpcStr(Memory.ShiftType);
+ OS << " shift-imm:" << Memory.ShiftImm;
+ }
+ if (Memory.Alignment)
+ OS << " alignment:" << Memory.Alignment;
OS << ">";
break;
case k_PostIndexRegister:
OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
- << PostIdxReg.RegNum;
+ << RegName(PostIdxReg.RegNum);
if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
<< PostIdxReg.ShiftImm;
@@ -3269,23 +3293,21 @@ void ARMOperand::print(raw_ostream &OS) const {
break;
}
case k_Register:
- OS << "<register " << getReg() << ">";
+ OS << "<register " << RegName(getReg()) << ">";
break;
case k_ShifterImmediate:
OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
<< " #" << ShifterImm.Imm << ">";
break;
case k_ShiftedRegister:
- OS << "<so_reg_reg "
- << RegShiftedReg.SrcReg << " "
- << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
- << " " << RegShiftedReg.ShiftReg << ">";
+ OS << "<so_reg_reg " << RegName(RegShiftedReg.SrcReg) << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) << " "
+ << RegName(RegShiftedReg.ShiftReg) << ">";
break;
case k_ShiftedImmediate:
- OS << "<so_reg_imm "
- << RegShiftedImm.SrcReg << " "
- << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
- << " #" << RegShiftedImm.ShiftImm << ">";
+ OS << "<so_reg_imm " << RegName(RegShiftedImm.SrcReg) << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) << " #"
+ << RegShiftedImm.ShiftImm << ">";
break;
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -3309,7 +3331,7 @@ void ARMOperand::print(raw_ostream &OS) const {
const SmallVectorImpl<unsigned> &RegList = getRegList();
for (SmallVectorImpl<unsigned>::const_iterator
I = RegList.begin(), E = RegList.end(); I != E; ) {
- OS << *I;
+ OS << RegName(*I);
if (++I < E) OS << ", ";
}
@@ -3318,15 +3340,15 @@ void ARMOperand::print(raw_ostream &OS) const {
}
case k_VectorList:
OS << "<vector_list " << VectorList.Count << " * "
- << VectorList.RegNum << ">";
+ << RegName(VectorList.RegNum) << ">";
break;
case k_VectorListAllLanes:
OS << "<vector_list(all lanes) " << VectorList.Count << " * "
- << VectorList.RegNum << ">";
+ << RegName(VectorList.RegNum) << ">";
break;
case k_VectorListIndexed:
OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
- << VectorList.Count << " * " << VectorList.RegNum << ">";
+ << VectorList.Count << " * " << RegName(VectorList.RegNum) << ">";
break;
case k_Token:
OS << "'" << getToken() << "'";
@@ -5626,7 +5648,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd")
+ Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5716,7 +5739,10 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
(FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
Mnemonic == "vmovx" || Mnemonic == "vins" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
- Mnemonic == "vcmla" || Mnemonic == "vcadd") {
+ Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+ Mnemonic == "sb" || Mnemonic == "ssbb" ||
+ Mnemonic == "pssbb") {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -6819,6 +6845,26 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"code specified");
break;
}
+ case ARM::DSB:
+ case ARM::t2DSB: {
+
+ if (Inst.getNumOperands() < 2)
+ break;
+
+ unsigned Option = Inst.getOperand(0).getImm();
+ unsigned Pred = Inst.getOperand(1).getImm();
+
+ // SSBB and PSSBB (DSB #0|#4) are not predicable (pred must be AL).
+ if (Option == 0 && Pred != ARMCC::AL)
+ return Error(Operands[1]->getStartLoc(),
+ "instruction 'ssbb' is not predicable, but condition code "
+ "specified");
+ if (Option == 4 && Pred != ARMCC::AL)
+ return Error(Operands[1]->getStartLoc(),
+ "instruction 'pssbb' is not predicable, but condition code "
+ "specified");
+ break;
+ }
case ARM::VMOVRRS: {
// Source registers must be sequential.
const unsigned Sm = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -6837,6 +6883,15 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"destination operands must be sequential");
break;
}
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA: {
+ ARMOperand &Op = static_cast<ARMOperand&>(*Operands[3]);
+ auto &RegList = Op.getRegList();
+ if (RegList.size() < 1 || RegList.size() > 16)
+ return Error(Operands[3]->getStartLoc(),
+ "list of registers must be at least 1 and at most 16");
+ break;
+ }
}
return false;
@@ -9122,33 +9177,9 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
// Any arithmetic instruction which writes to the PC also terminates the IT
// block.
- for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) {
- MCOperand &Op = Inst.getOperand(OpIdx);
- if (Op.isReg() && Op.getReg() == ARM::PC)
- return true;
- }
-
- if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI))
+ if (MCID.hasDefOfPhysReg(Inst, ARM::PC, *MRI))
return true;
- // Instructions with variable operand lists, which write to the variable
- // operands. We only care about Thumb instructions here, as ARM instructions
- // obviously can't be in an IT block.
- switch (Inst.getOpcode()) {
- case ARM::tLDMIA:
- case ARM::t2LDMIA:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB:
- case ARM::t2LDMDB_UPD:
- if (listContainsReg(Inst, 3, ARM::PC))
- return true;
- break;
- case ARM::tPOP:
- if (listContainsReg(Inst, 2, ARM::PC))
- return true;
- break;
- }
-
return false;
}
@@ -9255,6 +9286,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
+ LLVM_DEBUG(dbgs() << "Parsed as: ";
+ Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+ dbgs() << "\n");
+
// Context sensitive operand constraints aren't handled by the matcher,
// so check them here.
if (validateInstruction(Inst, Operands)) {
@@ -9272,7 +9307,9 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// individual transformations can chain off each other. E.g.,
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
while (processInstruction(Inst, Operands, Out))
- ;
+ LLVM_DEBUG(dbgs() << "Changed to: ";
+ Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+ dbgs() << "\n");
// Only after the instruction is fully processed, we can validate it
if (wasInITBlock && hasV8Ops() && isThumb() &&
@@ -9441,10 +9478,13 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return false;
}
-void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+void ARMAsmParser::doBeforeLabelEmit(MCSymbol *Symbol) {
// We need to flush the current implicit IT block on a label, because it is
// not legal to branch into an IT block.
flushPendingInstructions(getStreamer());
+}
+
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
if (NextSymbolIsThumb) {
getParser().getStreamer().EmitThumbFunc(Symbol);
NextSymbolIsThumb = false;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bfc32073ba18..2f84719c4c4f 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -273,6 +273,21 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
case ARM::t2TSB:
O << "\ttsb\tcsync";
return;
+ case ARM::t2DSB:
+ switch (MI->getOperand(0).getImm()) {
+ default:
+ if (!printAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
+ break;
+ case 0:
+ O << "\tssbb";
+ break;
+ case 4:
+ O << "\tpssbb";
+ break;
+ }
+ printAnnotation(O, Annot);
+ return;
}
if (!printAliasInstr(MI, STI, O))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index f472b2154314..e1ea5964cf67 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -627,27 +628,22 @@ namespace ARM_AM {
//
inline float getFPImmFloat(unsigned Imm) {
// We expect an 8-bit binary encoding of a floating-point number here.
- union {
- uint32_t I;
- float F;
- } FPUnion;
uint8_t Sign = (Imm >> 7) & 0x1;
uint8_t Exp = (Imm >> 4) & 0x7;
uint8_t Mantissa = Imm & 0xf;
- // 8-bit FP iEEEE Float Encoding
+ // 8-bit FP IEEE Float Encoding
// abcd efgh aBbbbbbc defgh000 00000000 00000000
//
// where B = NOT(b);
-
- FPUnion.I = 0;
- FPUnion.I |= Sign << 31;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
- FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
- FPUnion.I |= (Exp & 0x3) << 23;
- FPUnion.I |= Mantissa << 19;
- return FPUnion.F;
+ uint32_t I = 0;
+ I |= Sign << 31;
+ I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+ I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+ I |= (Exp & 0x3) << 23;
+ I |= Mantissa << 19;
+ return bit_cast<float>(I);
}
/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index f524a0081301..c2a07d4ddcef 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -373,6 +373,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// interfere with checking valid expressions.
if (const MCSymbolRefExpr *A = Target.getSymA()) {
if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+ A->getSymbol().isExternal() &&
(Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
Kind == ARM::fixup_t2_movt_hi16))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index beeb5dec4baf..33c32d5464af 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -248,6 +248,11 @@ namespace ARMII {
/// just that part of the flag set.
MO_OPTION_MASK = 0x3,
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptrp.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB = 0x4,
+
/// MO_GOT - On a symbol operand, this represents a GOT relative relocation.
MO_GOT = 0x8,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 7d04c73fb3f2..b8ba7584911b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
@@ -40,6 +41,8 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
+
+ void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec) override;
};
} // end anonymous namespace
@@ -236,6 +239,21 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
}
+void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx,
+ MCSectionELF &Sec) {
+ // The mix of execute-only and non-execute-only at link time is
+ // non-execute-only. To avoid the empty implicitly created .text
+ // section from making the whole .text section non-execute-only, we
+ // mark it execute-only if it is empty and there is at least one
+ // execute-only section in the object.
+ MCSectionELF *TextSection =
+ static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
+ if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions() &&
+ !TextSection->hasData()) {
+ TextSection->setFlags(TextSection->getFlags() | ELF::SHF_ARM_PURECODE);
+ }
+}
+
std::unique_ptr<MCObjectTargetWriter>
llvm::createARMELFObjectWriter(uint8_t OSABI) {
return llvm::make_unique<ARMELFObjectWriter>(OSABI);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 3373d691db50..d3744fffac32 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -465,6 +465,11 @@ public:
void emitPad(int64_t Offset);
void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) override {
+ EmitDataMappingSymbol();
+ MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+ }
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
@@ -861,6 +866,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_2A:
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
+ case ARM::ArchKind::ARMV8_5A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1071,7 +1077,7 @@ void ARMTargetELFStreamer::finishAttributeSection() {
if (Contents.empty())
return;
- llvm::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+ llvm::sort(Contents, AttributeItem::LessTag);
ARMELFStreamer &Streamer = getStreamer();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 0cef683778e5..3ee63ac374b3 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -31,6 +31,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+
// Exceptions handling
ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI())
? ExceptionHandling::SjLj
@@ -56,6 +59,9 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
SupportsDebugInformation = true;
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+
// Exceptions handling
switch (TheTriple.getOS()) {
case Triple::NetBSD:
@@ -90,6 +96,9 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
CommentString = ";";
+
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
}
void ARMCOFFMCAsmInfoGNU::anchor() { }
@@ -110,5 +119,7 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
UseIntegratedAssembler = true;
DwarfRegNumForCFI = false;
-}
+ // Conditional Thumb 4-byte instructions can have an implicit IT.
+ MaxInstLength = 6;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 4b4956e914f2..0ced8195790d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -22,6 +22,8 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ScopedPrinter.h"
+
using namespace llvm;
namespace {
@@ -144,6 +146,15 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
MCValue Target,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+ if (FixupOffset & 0xff000000) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "can not encode offset '0x" +
+ to_hexString(FixupOffset) +
+ "' in resulting scattered relocation.");
+ return;
+ }
+
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = MachO::ARM_RELOC_HALF;
@@ -250,6 +261,15 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
unsigned Log2Size,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+ if (FixupOffset & 0xff000000) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "can not encode offset '0x" +
+ to_hexString(FixupOffset) +
+ "' in resulting scattered relocation.");
+ return;
+ }
+
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
// See <reloc.h>.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 63aa9735e8a4..91836cff95c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 8ae713b7b489..30cbde1ca71f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -75,8 +75,8 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case ARM::fixup_t2_condbranch:
return COFF::IMAGE_REL_ARM_BRANCH20T;
case ARM::fixup_t2_uncondbranch:
- return COFF::IMAGE_REL_ARM_BRANCH24T;
case ARM::fixup_arm_thumb_bl:
+ return COFF::IMAGE_REL_ARM_BRANCH24T;
case ARM::fixup_arm_thumb_blx:
return COFF::IMAGE_REL_ARM_BLX23T;
case ARM::fixup_t2_movw_lo16:
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1a91a7030657..d567d3339049 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -146,9 +146,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
- RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
- RC == &ARM::GPRnopcRegClass) {
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
@@ -190,9 +188,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
- RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
- RC == &ARM::GPRnopcRegClass) {
+ if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index abf54ba7e87c..65889fc4e28b 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -122,6 +122,7 @@ namespace {
{ ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
{ ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
{ ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
{ ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
{ ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
{ ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
@@ -485,7 +486,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
.addReg(Rt, IsStore ? 0 : RegState::Define);
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -605,7 +606,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -717,6 +718,16 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
+ case ARM::t2TEQrr: {
+ unsigned PredReg = 0;
+ // Can only convert to eors if we're not in an IT block.
+ if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
+ break;
+ // TODO if Operand 0 is not killed but Operand 1 is, then we could write
+ // to Op1 instead.
+ if (MI->getOperand(0).isKill())
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+ }
}
return false;
}
@@ -903,9 +914,24 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.add(MI->getOperand(0));
- if (NewMCID.hasOptionalDef())
- MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+
+ // TEQ is special in that it doesn't define a register but we're converting
+ // it into an EOR which does. So add the first operand as a def and then
+ // again as a use.
+ if (MCID.getOpcode() == ARM::t2TEQrr) {
+ MIB.add(MI->getOperand(0));
+ MIB->getOperand(0).setIsKill(false);
+ MIB->getOperand(0).setIsDef(true);
+ MIB->getOperand(0).setIsDead(true);
+
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+ MIB.add(MI->getOperand(0));
+ } else {
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+ }
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index fec7080081d0..536a54759c77 100644
--- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -568,8 +568,8 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("Unknown operand type!");
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -617,8 +617,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -648,8 +648,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
.addReg(SrcReg, RegState::Kill);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -679,8 +679,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
.addReg(SrcReg, RegState::Kill);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -734,8 +734,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -782,8 +782,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1003,8 +1003,8 @@ bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1031,8 +1031,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1065,8 +1065,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.addImm(Imm);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1099,8 +1099,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.addImm(Imm);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1133,8 +1133,8 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1163,8 +1163,8 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addImm(Imm + 1);
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1194,8 +1194,8 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
- MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
@@ -1251,24 +1251,26 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::LSLRd;
- OpHi = AVR::ROLRd;
+ OpLo = AVR::ADDRdRr; // ADD Rd, Rd <==> LSL Rd
+ OpHi = AVR::ADCRdRr; // ADC Rd, Rd <==> ROL Rd
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Low part
buildMI(MBB, MBBI, OpLo)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg)
.addReg(DstLoReg, getKillRegState(DstIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg)
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
- MIBHI->getOperand(2).setIsDead();
+ MIBHI->getOperand(3).setIsDead();
// SREG is always implicitly killed
- MIBHI->getOperand(3).setIsKill();
+ MIBHI->getOperand(4).setIsKill();
MI.eraseFromParent();
return true;
@@ -1387,8 +1389,9 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
- buildMI(MBB, MBBI, AVR::LSLRd)
+ buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
.addReg(DstHiReg, RegState::Define)
+ .addReg(DstHiReg)
.addReg(DstHiReg, RegState::Kill);
auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index b0b23effc6c6..85abf42eaa67 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -350,9 +350,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) {
SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, MVT::Other, Ops);
// Transfer memory operands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {ST->getMemOperand()});
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
CurDAG->RemoveDeadNode(N);
@@ -407,9 +405,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
}
// Transfer memory operands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {LD->getMemOperand()});
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 1b412a9c6813..57fc978b54bb 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1430,6 +1430,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
MachineBasicBlock *BB) const {
unsigned Opc;
const TargetRegisterClass *RC;
+ bool HasRepeatedOperand = false;
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
@@ -1440,8 +1441,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
default:
llvm_unreachable("Invalid shift opcode!");
case AVR::Lsl8:
- Opc = AVR::LSLRd;
+ Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
RC = &AVR::GPR8RegClass;
+ HasRepeatedOperand = true;
break;
case AVR::Lsl16:
Opc = AVR::LSLWRd;
@@ -1464,8 +1466,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
RC = &AVR::DREGSRegClass;
break;
case AVR::Rol8:
- Opc = AVR::ROLRd;
+ Opc = AVR::ADCRdRr; // ROL is an alias of ADC Rd, Rd
RC = &AVR::GPR8RegClass;
+ HasRepeatedOperand = true;
break;
case AVR::Rol16:
Opc = AVR::ROLWRd;
@@ -1535,7 +1538,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
.addMBB(BB)
.addReg(ShiftAmtReg2)
.addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+
+ auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+ if (HasRepeatedOperand)
+ ShiftMI.addReg(ShiftReg);
+
BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
.addReg(ShiftAmtReg)
.addImm(1);
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
index a2129cc0e2e9..5720af7d8df6 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -730,15 +730,15 @@ Defs = [SREG] in
// TST Rd
// Test for zero of minus.
// This operation is identical to a `Rd AND Rd`.
-//def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd), 1>;
+def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd)>;
-let Defs = [SREG] in
-def TSTRd : FTST<0b0010,
- 0b00,
- (outs),
- (ins GPR8:$rd),
- "tst\t$rd",
- [(AVRtst i8:$rd)]>;
+// SBR Rd, K
+//
+// Mnemonic alias to 'ORI Rd, K'. Same bit pattern, same operands,
+// same everything.
+def : InstAlias<"sbr\t$rd, $k",
+ (ORIRdK LD8:$rd, imm_ldi8:$k),
+ /* Disable display, so we don't override ORI */ 0>;
//===----------------------------------------------------------------------===//
// Jump instructions
@@ -1222,7 +1222,7 @@ isReMaterializable = 1 in
// ldd Rd, P+q
// ldd Rd+1, P+q+1
let Constraints = "@earlyclobber $dst" in
- def LDDWRdPtrQ : Pseudo<(outs DREGS:$dst),
+ def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
(ins memri:$memri),
"lddw\t$dst, $memri",
[(set i16:$dst, (load addr:$memri))]>,
@@ -1632,12 +1632,7 @@ def LATZRd : FZRd<0b111,
let Constraints = "$src = $rd",
Defs = [SREG] in
{
- def LSLRd : FRdRr<0b0000,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "lsl\t$rd",
- [(set i8:$rd, (AVRlsl i8:$src)), (implicit SREG)]>;
+ // 8-bit LSL is an alias of ADD Rd, Rd
def LSLWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
@@ -1671,12 +1666,7 @@ Defs = [SREG] in
// Bit rotate operations.
let Uses = [SREG] in
{
- def ROLRd : FRdRr<0b0001,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "rol\t$rd",
- [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+ // 8-bit ROL is an alias of ADC Rd, Rd
def ROLWRd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
@@ -1743,15 +1733,6 @@ def BLD : FRdB<0b00,
let Constraints = "$src = $rd",
Defs = [SREG] in
{
- // SBR Rd, K
- // Alias for ORI Rd, K
- def SBRRdK : FRdK<0b0110,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "sbr\t$rd, $k",
- [(set i8:$rd, (or i8:$src, imm:$k)),
- (implicit SREG)]>;
-
// CBR Rd, K
// Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
// FIXME: This uses the 'complement' encoder. We need it to also use the
@@ -1769,6 +1750,14 @@ Defs = [SREG] in
// Clears all bits in a register.
def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8:$rd, GPR8:$rd)>;
+// LSL Rd
+// Alias for ADD Rd, Rd
+// --------------
+// Logical shift left one bit.
+def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
+def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8:$rd, GPR8:$rd)>;
+
// SER Rd
// Alias for LDI Rd, 0xff
// ---------
@@ -2107,3 +2096,13 @@ def : Pat<(i8 (trunc (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr
def : Pat<(shl i16:$src1, (i8 1)),
(LSLWRd i16:$src1)>;
+// Lowering of 'tst' node to 'TST' instruction.
+// TST is an alias of AND Rd, Rd.
+def : Pat<(AVRtst i8:$rd),
+ (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+
+// Lowering of 'lsl' node to 'LSL' instruction.
+// LSL is an alias of 'ADD Rd, Rd'
+def : Pat<(AVRlsl i8:$rd),
+ (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index d171a620760e..808a85e459c1 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -152,6 +152,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.getOpcode() == AVR::FRMIDX) {
MI.setDesc(TII.get(AVR::MOVWRdRr));
MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false);
+ MI.RemoveOperand(2);
assert(Offset > 0 && "Invalid offset");
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 8162f12052be..d55252bcac46 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -157,6 +157,26 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
R9R8, R7R6, R5R4, R3R2, R1R0
)>;
+// The 16-bit DREGS register class, excluding the Z pointer register.
+//
+// This is used by instructions which cause high pointer register
+// contention which leads to an assertion in the register allocator.
+//
+// There is no technical reason why instructions that use this class
+// cannot use Z; it's simply a workaround a regalloc bug.
+//
+// More information can be found in PR39553.
+def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10,
+ R9R8, R7R6, R5R4, R3R2, R1R0
+ )>;
+
// 16-bit register class for immediate instructions.
def DLDREGS : RegisterClass<"AVR", [i16], 8,
(
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 74300d9a451c..9828cdab68c3 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -40,12 +40,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return RM.hasValue() ? *RM : Reloc::Static;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -53,8 +47,8 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options,
- getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
- OL),
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
SubTarget(TT, getCPU(CPU), FS, *this) {
this->TLOF = make_unique<AVRTargetObjectFile>();
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index d57cc098497f..f2bb59265271 100644
--- a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -34,8 +34,9 @@
#define DEBUG_TYPE "avr-asm-parser"
-namespace llvm {
+using namespace llvm;
+namespace {
/// Parses AVR assembly from a stream.
class AVRAsmParser : public MCTargetAsmParser {
const MCSubtargetInfo &STI;
@@ -245,6 +246,8 @@ public:
}
};
+} // end anonymous namespace.
+
// Auto-generated Match Functions
/// Maps from the set of all register names to a register number.
@@ -510,6 +513,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
case AsmToken::Real:
if (!tryParseExpression(Operands))
return false;
+ break;
default:
break;
}
@@ -708,5 +712,3 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
return Match_InvalidOperand;
}
-
-} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 496f2befde58..8890fb8adf4d 100644
--- a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -357,8 +357,8 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
case AsmToken::Plus: {
if (getLexer().peekTok().is(AsmToken::Integer))
return MatchOperand_NoMatch;
+ LLVM_FALLTHROUGH;
}
- // Fall through.
case AsmToken::Equal:
case AsmToken::Greater:
diff --git a/contrib/llvm/lib/Target/BPF/BPF.h b/contrib/llvm/lib/Target/BPF/BPF.h
index 76d3e1ca5f6f..9749e369c2c1 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm/lib/Target/BPF/BPF.h
@@ -19,9 +19,11 @@ class BPFTargetMachine;
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMIPeepholePass();
FunctionPass *createBPFMIPreEmitPeepholePass();
+FunctionPass *createBPFMIPreEmitCheckingPass();
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
+void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
}
#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 705211b486bf..ada5eb923f40 100644
--- a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "BPFInstrInfo.h"
#include "BPFMCInstLower.h"
#include "BPFTargetMachine.h"
+#include "BTFDebug.h"
#include "InstPrinter/BPFInstPrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -40,6 +41,7 @@ public:
: AsmPrinter(TM, std::move(Streamer)) {}
StringRef getPassName() const override { return "BPF Assembly Printer"; }
+ bool doInitialization(Module &M) override;
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
@@ -52,6 +54,18 @@ public:
};
} // namespace
+bool BPFAsmPrinter::doInitialization(Module &M) {
+ AsmPrinter::doInitialization(M);
+
+ if (MAI->doesSupportDebugInformation()) {
+ Handlers.push_back(HandlerInfo(new BTFDebug(this), "emit",
+ "Debug Info Emission", "BTF",
+ "BTF Emission"));
+ }
+
+ return false;
+}
+
void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
diff --git a/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
new file mode 100644
index 000000000000..0a311378e777
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -0,0 +1,96 @@
+//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs checking to signal errors for certain illegal usages at
+// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
+// not be used. The pass is done at the PreEmit pass right before the
+// machine code is emitted at which point the register liveness information
+// is still available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-checking"
+
+namespace {
+
+struct BPFMIPreEmitChecking : public MachineFunctionPass {
+
+ static char ID;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+
+ BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
+ initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ void checkingIllegalXADD(void);
+
+public:
+
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!skipFunction(MF.getFunction())) {
+ initialize(MF);
+ checkingIllegalXADD();
+ }
+ return false;
+ }
+};
+
+// Initialize class variables.
+void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
+}
+
+void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
+ continue;
+
+ LLVM_DEBUG(MI.dump());
+ if (!MI.allDefsAreDead()) {
+ DebugLoc Empty;
+ const DebugLoc &DL = MI.getDebugLoc();
+ if (DL != Empty)
+ report_fatal_error("line " + std::to_string(DL.getLine()) +
+ ": Invalid usage of the XADD return value", false);
+ else
+ report_fatal_error("Invalid usage of the XADD return value", false);
+ }
+ }
+ }
+
+ return;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
+ "BPF PreEmit Checking", false, false)
+
+char BPFMIPreEmitChecking::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
+{
+ return new BPFMIPreEmitChecking();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
index bb0d6bcf5450..4202850e9eb9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -29,8 +29,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 84d89bff74fe..350465b118ed 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -51,12 +51,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -64,13 +58,14 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
- OL),
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
- BPFMCAsmInfo *MAI = static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo));
+ BPFMCAsmInfo *MAI =
+ static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
}
namespace {
@@ -115,6 +110,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
void BPFPassConfig::addPreEmitPass() {
const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
+ addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOpt::None)
if (Subtarget->getHasAlu32() && !DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
diff --git a/contrib/llvm/lib/Target/BPF/BTF.def b/contrib/llvm/lib/Target/BPF/BTF.def
new file mode 100644
index 000000000000..54c5bc3cf092
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.def
@@ -0,0 +1,33 @@
+//===- BTF.def - BTF definitions --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for BTF.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(HANDLE_BTF_KIND)
+#error "Missing macro definition of HANDLE_BTF_*"
+#endif
+
+HANDLE_BTF_KIND(0, UNKN)
+HANDLE_BTF_KIND(1, INT)
+HANDLE_BTF_KIND(2, PTR)
+HANDLE_BTF_KIND(3, ARRAY)
+HANDLE_BTF_KIND(4, STRUCT)
+HANDLE_BTF_KIND(5, UNION)
+HANDLE_BTF_KIND(6, ENUM)
+HANDLE_BTF_KIND(7, FWD)
+HANDLE_BTF_KIND(8, TYPEDEF)
+HANDLE_BTF_KIND(9, VOLATILE)
+HANDLE_BTF_KIND(10, CONST)
+HANDLE_BTF_KIND(11, RESTRICT)
+HANDLE_BTF_KIND(12, FUNC)
+HANDLE_BTF_KIND(13, FUNC_PROTO)
+
+#undef HANDLE_BTF_KIND
diff --git a/contrib/llvm/lib/Target/BPF/BTF.h b/contrib/llvm/lib/Target/BPF/BTF.h
new file mode 100644
index 000000000000..1e1680faf1b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.h
@@ -0,0 +1,209 @@
+//===-- BTF.h --------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the layout of .BTF and .BTF.ext ELF sections.
+///
+/// The binary layout for .BTF section:
+/// struct Header
+/// Type and Str subsections
+/// The Type subsection is a collection of types with type id starting with 1.
+/// The Str subsection is simply a collection of strings.
+///
+/// The binary layout for .BTF.ext section:
+/// struct ExtHeader
+/// FuncInfo and LineInfo subsections
+/// The FuncInfo subsection is defined as below:
+/// BTFFuncInfo Size
+/// struct SecFuncInfo for ELF section #1
+/// A number of struct BPFFuncInfo for ELF section #1
+/// struct SecFuncInfo for ELF section #2
+/// A number of struct BPFFuncInfo for ELF section #2
+/// ...
+/// The LineInfo subsection is defined as below:
+/// BPFLineInfo Size
+/// struct SecLineInfo for ELF section #1
+/// A number of struct BPFLineInfo for ELF section #1
+/// struct SecLineInfo for ELF section #2
+/// A number of struct BPFLineInfo for ELF section #2
+/// ...
+///
+/// The section formats are also defined at
+/// https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTF_H
+#define LLVM_LIB_TARGET_BPF_BTF_H
+
+namespace llvm {
+namespace BTF {
+
+enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
+
+/// Sizes in bytes of various things in the BTF format.
+enum {
+ HeaderSize = 24,
+ ExtHeaderSize = 24,
+ CommonTypeSize = 12,
+ BTFArraySize = 12,
+ BTFEnumSize = 8,
+ BTFMemberSize = 12,
+ BTFParamSize = 8,
+ SecFuncInfoSize = 8,
+ SecLineInfoSize = 8,
+ BPFFuncInfoSize = 8,
+ BPFLineInfoSize = 16
+};
+
+/// The .BTF section header definition.
+struct Header {
+ uint16_t Magic; ///< Magic value
+ uint8_t Version; ///< Version number
+ uint8_t Flags; ///< Extra flags
+ uint32_t HdrLen; ///< Length of this header
+
+ /// All offsets are in bytes relative to the end of this header.
+ uint32_t TypeOff; ///< Offset of type section
+ uint32_t TypeLen; ///< Length of type section
+ uint32_t StrOff; ///< Offset of string section
+ uint32_t StrLen; ///< Length of string section
+};
+
+enum : uint32_t {
+ MAX_VLEN = 0xffff ///< Max # of struct/union/enum members or func args
+};
+
+enum TypeKinds : uint8_t {
+#define HANDLE_BTF_KIND(ID, NAME) BTF_KIND_##NAME = ID,
+#include "BTF.def"
+};
+
+/// The BTF common type definition. Different kinds may have
+/// additional information after this structure data.
+struct CommonType {
+ /// Type name offset in the string table.
+ uint32_t NameOff;
+
+ /// "Info" bits arrangement:
+ /// Bits 0-15: vlen (e.g. # of struct's members)
+ /// Bits 16-23: unused
+ /// Bits 24-27: kind (e.g. int, ptr, array...etc)
+ /// Bits 28-30: unused
+ /// Bit 31: kind_flag, currently used by
+ /// struct, union and fwd
+ uint32_t Info;
+
+ /// "Size" is used by INT, ENUM, STRUCT and UNION.
+ /// "Size" tells the size of the type it is describing.
+ ///
+ /// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+ /// FUNC and FUNC_PROTO.
+ /// "Type" is a type_id referring to another type.
+ union {
+ uint32_t Size;
+ uint32_t Type;
+ };
+};
+
+// For some specific BTF_KIND, "struct CommonType" is immediately
+// followed by extra data.
+
+// BTF_KIND_INT is followed by a u32 and the following
+// is the 32 bits arrangement:
+// BTF_INT_ENCODING(VAL) : (((VAL) & 0x0f000000) >> 24)
+// BTF_INT_OFFSET(VAL) : (((VAL & 0x00ff0000)) >> 16)
+// BTF_INT_BITS(VAL) : ((VAL) & 0x000000ff)
+
+/// Attributes stored in the INT_ENCODING.
+enum : uint8_t { INT_SIGNED = (1 << 0), INT_CHAR = (1 << 1), INT_BOOL = (1 << 2) };
+
+/// BTF_KIND_ENUM is followed by multiple "struct BTFEnum".
+/// The exact number of btf_enum is stored in the vlen (of the
+/// info in "struct CommonType").
+struct BTFEnum {
+ uint32_t NameOff; ///< Enum name offset in the string table
+ int32_t Val; ///< Enum member value
+};
+
+/// BTF_KIND_ARRAY is followed by one "struct BTFArray".
+struct BTFArray {
+ uint32_t ElemType; ///< Element type
+ uint32_t IndexType; ///< Index type
+ uint32_t Nelems; ///< Number of elements for this array
+};
+
+/// BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+/// by multiple "struct BTFMember". The exact number
+/// of BTFMember is stored in the vlen (of the info in
+/// "struct CommonType").
+///
+/// If the struct/union contains any bitfield member,
+/// the Offset below represents BitOffset (bits 0 - 23)
+/// and BitFieldSize(bits 24 - 31) with BitFieldSize = 0
+/// for non bitfield members. Otherwise, the Offset
+/// represents the BitOffset.
+struct BTFMember {
+ uint32_t NameOff; ///< Member name offset in the string table
+ uint32_t Type; ///< Member type
+ uint32_t Offset; ///< BitOffset or BitFieldSize+BitOffset
+};
+
+/// BTF_KIND_FUNC_PROTO are followed by multiple "struct BTFParam".
+/// The exist number of BTFParam is stored in the vlen (of the info
+/// in "struct CommonType").
+struct BTFParam {
+ uint32_t NameOff;
+ uint32_t Type;
+};
+
+/// The .BTF.ext section header definition.
+struct ExtHeader {
+ uint16_t Magic;
+ uint8_t Version;
+ uint8_t Flags;
+ uint32_t HdrLen;
+
+ uint32_t FuncInfoOff; ///< Offset of func info section
+ uint32_t FuncInfoLen; ///< Length of func info section
+ uint32_t LineInfoOff; ///< Offset of line info section
+ uint32_t LineInfoLen; ///< Length of line info section
+};
+
+/// Specifying one function info.
+struct BPFFuncInfo {
+ uint32_t InsnOffset; ///< Byte offset in the section
+ uint32_t TypeId; ///< Type id referring to .BTF type section
+};
+
+/// Specifying function info's in one section.
+struct SecFuncInfo {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
+ uint32_t NumFuncInfo; ///< Number of func info's in this section
+};
+
+/// Specifying one line info.
+struct BPFLineInfo {
+ uint32_t InsnOffset; ///< Byte offset in this section
+ uint32_t FileNameOff; ///< File name index in the .BTF string table
+ uint32_t LineOff; ///< Line index in the .BTF string table
+ uint32_t LineCol; ///< Line num: line_col >> 10,
+ /// col num: line_col & 0x3ff
+};
+
+/// Specifying line info's in one section.
+struct SecLineInfo {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string tble
+ uint32_t NumLineInfo; ///< Number of line info's in this section
+};
+
+} // End namespace BTF.
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
new file mode 100644
index 000000000000..96efea4ba8ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -0,0 +1,759 @@
+//===- BTFDebug.cpp - BTF Generator ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing BTF debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BTFDebug.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include <fstream>
+#include <sstream>
+
+using namespace llvm;
+
+static const char *BTFKindStr[] = {
+#define HANDLE_BTF_KIND(ID, NAME) "BTF_KIND_" #NAME,
+#include "BTF.def"
+};
+
+/// Emit a BTF common type.
+void BTFTypeBase::emitType(MCStreamer &OS) {
+ OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
+ ")");
+ OS.EmitIntValue(BTFType.NameOff, 4);
+ OS.AddComment("0x" + Twine::utohexstr(BTFType.Info));
+ OS.EmitIntValue(BTFType.Info, 4);
+ OS.EmitIntValue(BTFType.Size, 4);
+}
+
+BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
+ : DTy(DTy) {
+ switch (Tag) {
+ case dwarf::DW_TAG_pointer_type:
+ Kind = BTF::BTF_KIND_PTR;
+ break;
+ case dwarf::DW_TAG_const_type:
+ Kind = BTF::BTF_KIND_CONST;
+ break;
+ case dwarf::DW_TAG_volatile_type:
+ Kind = BTF::BTF_KIND_VOLATILE;
+ break;
+ case dwarf::DW_TAG_typedef:
+ Kind = BTF::BTF_KIND_TYPEDEF;
+ break;
+ case dwarf::DW_TAG_restrict_type:
+ Kind = BTF::BTF_KIND_RESTRICT;
+ break;
+ default:
+ llvm_unreachable("Unknown DIDerivedType Tag");
+ }
+ BTFType.Info = Kind << 24;
+}
+
+void BTFTypeDerived::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(DTy->getName());
+
+ // The base type for PTR/CONST/VOLATILE could be void.
+ const DIType *ResolvedType = DTy->getBaseType().resolve();
+ if (!ResolvedType) {
+ assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
+ Kind == BTF::BTF_KIND_VOLATILE) &&
+ "Invalid null basetype");
+ BTFType.Type = 0;
+ } else {
+ BTFType.Type = BDebug.getTypeId(ResolvedType);
+ }
+}
+
+void BTFTypeDerived::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+/// Represent a struct/union forward declaration.
+BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
+ Kind = BTF::BTF_KIND_FWD;
+ BTFType.Info = IsUnion << 31 | Kind << 24;
+ BTFType.Type = 0;
+}
+
+void BTFTypeFwd::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFwd::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+BTFTypeInt::BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits,
+ uint32_t OffsetInBits, StringRef TypeName)
+ : Name(TypeName) {
+ // Translate IR int encoding to BTF int encoding.
+ uint8_t BTFEncoding;
+ switch (Encoding) {
+ case dwarf::DW_ATE_boolean:
+ BTFEncoding = BTF::INT_BOOL;
+ break;
+ case dwarf::DW_ATE_signed:
+ case dwarf::DW_ATE_signed_char:
+ BTFEncoding = BTF::INT_SIGNED;
+ break;
+ case dwarf::DW_ATE_unsigned:
+ case dwarf::DW_ATE_unsigned_char:
+ BTFEncoding = 0;
+ break;
+ default:
+ llvm_unreachable("Unknown BTFTypeInt Encoding");
+ }
+
+ Kind = BTF::BTF_KIND_INT;
+ BTFType.Info = Kind << 24;
+ BTFType.Size = roundupToBytes(SizeInBits);
+ IntVal = (BTFEncoding << 24) | OffsetInBits << 16 | SizeInBits;
+}
+
+void BTFTypeInt::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeInt::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.AddComment("0x" + Twine::utohexstr(IntVal));
+ OS.EmitIntValue(IntVal, 4);
+}
+
+BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
+ Kind = BTF::BTF_KIND_ENUM;
+ BTFType.Info = Kind << 24 | VLen;
+ BTFType.Size = roundupToBytes(ETy->getSizeInBits());
+}
+
+void BTFTypeEnum::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(ETy->getName());
+
+ DINodeArray Elements = ETy->getElements();
+ for (const auto Element : Elements) {
+ const auto *Enum = cast<DIEnumerator>(Element);
+
+ struct BTF::BTFEnum BTFEnum;
+ BTFEnum.NameOff = BDebug.addString(Enum->getName());
+ // BTF enum value is 32bit, enforce it.
+ BTFEnum.Val = static_cast<uint32_t>(Enum->getValue());
+ EnumValues.push_back(BTFEnum);
+ }
+}
+
+void BTFTypeEnum::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Enum : EnumValues) {
+ OS.EmitIntValue(Enum.NameOff, 4);
+ OS.EmitIntValue(Enum.Val, 4);
+ }
+}
+
+BTFTypeArray::BTFTypeArray(const DICompositeType *ATy) : ATy(ATy) {
+ Kind = BTF::BTF_KIND_ARRAY;
+ BTFType.Info = Kind << 24;
+}
+
+/// Represent a BTF array. BTF does not record array dimensions,
+/// so conceptually a BTF array is a one-dimensional array.
+void BTFTypeArray::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(ATy->getName());
+ BTFType.Size = 0;
+
+ auto *BaseType = ATy->getBaseType().resolve();
+ ArrayInfo.ElemType = BDebug.getTypeId(BaseType);
+
+ // The IR does not really have a type for the index.
+ // A special type for array index should have been
+ // created during initial type traversal. Just
+ // retrieve that type id.
+ ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
+
+ // Get the number of array elements.
+ // If the array size is 0, set the number of elements as 0.
+ // Otherwise, recursively traverse the base types to
+ // find the element size. The number of elements is
+ // the totoal array size in bits divided by
+ // element size in bits.
+ uint64_t ArraySizeInBits = ATy->getSizeInBits();
+ if (!ArraySizeInBits) {
+ ArrayInfo.Nelems = 0;
+ } else {
+ uint32_t BaseTypeSize = BaseType->getSizeInBits();
+ while (!BaseTypeSize) {
+ const auto *DDTy = cast<DIDerivedType>(BaseType);
+ BaseType = DDTy->getBaseType().resolve();
+ assert(BaseType);
+ BaseTypeSize = BaseType->getSizeInBits();
+ }
+ ArrayInfo.Nelems = ATy->getSizeInBits() / BaseTypeSize;
+ }
+}
+
+void BTFTypeArray::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.EmitIntValue(ArrayInfo.ElemType, 4);
+ OS.EmitIntValue(ArrayInfo.IndexType, 4);
+ OS.EmitIntValue(ArrayInfo.Nelems, 4);
+}
+
+/// Represent either a struct or a union.
+BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
+ bool HasBitField, uint32_t Vlen)
+ : STy(STy), HasBitField(HasBitField) {
+ Kind = IsStruct ? BTF::BTF_KIND_STRUCT : BTF::BTF_KIND_UNION;
+ BTFType.Size = roundupToBytes(STy->getSizeInBits());
+ BTFType.Info = (HasBitField << 31) | (Kind << 24) | Vlen;
+}
+
+void BTFTypeStruct::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(STy->getName());
+
+ // Add struct/union members.
+ const DINodeArray Elements = STy->getElements();
+ for (const auto *Element : Elements) {
+ struct BTF::BTFMember BTFMember;
+ const auto *DDTy = cast<DIDerivedType>(Element);
+
+ BTFMember.NameOff = BDebug.addString(DDTy->getName());
+ if (HasBitField) {
+ uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0;
+ BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits();
+ } else {
+ BTFMember.Offset = DDTy->getOffsetInBits();
+ }
+ BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType().resolve());
+ Members.push_back(BTFMember);
+ }
+}
+
+void BTFTypeStruct::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Member : Members) {
+ OS.EmitIntValue(Member.NameOff, 4);
+ OS.EmitIntValue(Member.Type, 4);
+ OS.AddComment("0x" + Twine::utohexstr(Member.Offset));
+ OS.EmitIntValue(Member.Offset, 4);
+ }
+}
+
+/// The Func kind represents both subprogram and pointee of function
+/// pointers. If the FuncName is empty, it represents a pointee of function
+/// pointer. Otherwise, it represents a subprogram. The func arg names
+/// are empty for pointee of function pointer case, and are valid names
+/// for subprogram.
+BTFTypeFuncProto::BTFTypeFuncProto(
+ const DISubroutineType *STy, uint32_t VLen,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames)
+ : STy(STy), FuncArgNames(FuncArgNames) {
+ Kind = BTF::BTF_KIND_FUNC_PROTO;
+ BTFType.Info = (Kind << 24) | VLen;
+}
+
+void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
+ DITypeRefArray Elements = STy->getTypeArray();
+ auto RetType = Elements[0].resolve();
+ BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
+ BTFType.NameOff = 0;
+
+ // For null parameter which is typically the last one
+ // to represent the vararg, encode the NameOff/Type to be 0.
+ for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
+ struct BTF::BTFParam Param;
+ auto Element = Elements[I].resolve();
+ if (Element) {
+ Param.NameOff = BDebug.addString(FuncArgNames[I]);
+ Param.Type = BDebug.getTypeId(Element);
+ } else {
+ Param.NameOff = 0;
+ Param.Type = 0;
+ }
+ Parameters.push_back(Param);
+ }
+}
+
+void BTFTypeFuncProto::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Param : Parameters) {
+ OS.EmitIntValue(Param.NameOff, 4);
+ OS.EmitIntValue(Param.Type, 4);
+ }
+}
+
+BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
+ : Name(FuncName) {
+ Kind = BTF::BTF_KIND_FUNC;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = ProtoTypeId;
+}
+
+void BTFTypeFunc::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFunc::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+uint32_t BTFStringTable::addString(StringRef S) {
+ // Check whether the string already exists.
+ for (auto &OffsetM : OffsetToIdMap) {
+ if (Table[OffsetM.second] == S)
+ return OffsetM.first;
+ }
+ // Not find, add to the string table.
+ uint32_t Offset = Size;
+ OffsetToIdMap[Offset] = Table.size();
+ Table.push_back(S);
+ Size += S.size() + 1;
+ return Offset;
+}
+
+BTFDebug::BTFDebug(AsmPrinter *AP)
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), SkipInstruction(false),
+ LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0) {
+ addString("\0");
+}
+
+void BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
+ const DIType *Ty) {
+ TypeEntry->setId(TypeEntries.size() + 1);
+ DIToIdMap[Ty] = TypeEntry->getId();
+ TypeEntries.push_back(std::move(TypeEntry));
+}
+
+uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
+ TypeEntry->setId(TypeEntries.size() + 1);
+ uint32_t Id = TypeEntry->getId();
+ TypeEntries.push_back(std::move(TypeEntry));
+ return Id;
+}
+
+void BTFDebug::visitBasicType(const DIBasicType *BTy) {
+ // Only int types are supported in BTF.
+ uint32_t Encoding = BTy->getEncoding();
+ if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
+ Encoding != dwarf::DW_ATE_signed_char &&
+ Encoding != dwarf::DW_ATE_unsigned &&
+ Encoding != dwarf::DW_ATE_unsigned_char)
+ return;
+
+ // Create a BTF type instance for this DIBasicType and put it into
+ // DIToIdMap for cross-type reference check.
+ auto TypeEntry = llvm::make_unique<BTFTypeInt>(
+ Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
+ addType(std::move(TypeEntry), BTy);
+}
+
+/// Handle subprogram or subroutine types.
+void BTFDebug::visitSubroutineType(
+ const DISubroutineType *STy, bool ForSubprog,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+ uint32_t &TypeId) {
+ DITypeRefArray Elements = STy->getTypeArray();
+ uint32_t VLen = Elements.size() - 1;
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ // Subprogram has a valid non-zero-length name, and the pointee of
+ // a function pointer has an empty name. The subprogram type will
+ // not be added to DIToIdMap as it should not be referenced by
+ // any other types.
+ auto TypeEntry = llvm::make_unique<BTFTypeFuncProto>(STy, VLen, FuncArgNames);
+ if (ForSubprog)
+ TypeId = addType(std::move(TypeEntry)); // For subprogram
+ else
+ addType(std::move(TypeEntry), STy); // For func ptr
+
+ // Visit return type and func arg types.
+ for (const auto Element : Elements) {
+ visitTypeEntry(Element.resolve());
+ }
+}
+
+/// Handle structure/union types.
+void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
+ const DINodeArray Elements = CTy->getElements();
+ uint32_t VLen = Elements.size();
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ // Check whether we have any bitfield members or not
+ bool HasBitField = false;
+ for (const auto *Element : Elements) {
+ auto E = cast<DIDerivedType>(Element);
+ if (E->isBitField()) {
+ HasBitField = true;
+ break;
+ }
+ }
+
+ auto TypeEntry =
+ llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
+ addType(std::move(TypeEntry), CTy);
+
+ // Visit all struct members.
+ for (const auto *Element : Elements)
+ visitTypeEntry(cast<DIDerivedType>(Element));
+}
+
+void BTFDebug::visitArrayType(const DICompositeType *CTy) {
+ auto TypeEntry = llvm::make_unique<BTFTypeArray>(CTy);
+ addType(std::move(TypeEntry), CTy);
+
+ // The IR does not have a type for array index while BTF wants one.
+ // So create an array index type if there is none.
+ if (!ArrayIndexTypeId) {
+ auto TypeEntry = llvm::make_unique<BTFTypeInt>(dwarf::DW_ATE_unsigned, 32,
+ 0, "__ARRAY_SIZE_TYPE__");
+ ArrayIndexTypeId = addType(std::move(TypeEntry));
+ }
+
+ // Visit array element type.
+ visitTypeEntry(CTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitEnumType(const DICompositeType *CTy) {
+ DINodeArray Elements = CTy->getElements();
+ uint32_t VLen = Elements.size();
+ if (VLen > BTF::MAX_VLEN)
+ return;
+
+ auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
+ addType(std::move(TypeEntry), CTy);
+ // No need to visit base type as BTF does not encode it.
+}
+
+/// Handle structure/union forward declarations.
+void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion) {
+ auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
+ addType(std::move(TypeEntry), CTy);
+}
+
+/// Handle structure, union, array and enumeration types.
+void BTFDebug::visitCompositeType(const DICompositeType *CTy) {
+ auto Tag = CTy->getTag();
+ if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ // Handle forward declaration differently as it does not have members.
+ if (CTy->isForwardDecl())
+ visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type);
+ else
+ visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type);
+ } else if (Tag == dwarf::DW_TAG_array_type)
+ visitArrayType(CTy);
+ else if (Tag == dwarf::DW_TAG_enumeration_type)
+ visitEnumType(CTy);
+}
+
+/// Handle pointer, typedef, const, volatile, restrict and member types.
+void BTFDebug::visitDerivedType(const DIDerivedType *DTy) {
+ unsigned Tag = DTy->getTag();
+
+ if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
+ Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag);
+ addType(std::move(TypeEntry), DTy);
+ } else if (Tag != dwarf::DW_TAG_member) {
+ return;
+ }
+
+ // Visit base type of pointer, typedef, const, volatile, restrict or
+ // struct/union member.
+ visitTypeEntry(DTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitTypeEntry(const DIType *Ty) {
+ if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end())
+ return;
+
+ uint32_t TypeId;
+ if (const auto *BTy = dyn_cast<DIBasicType>(Ty))
+ visitBasicType(BTy);
+ else if (const auto *STy = dyn_cast<DISubroutineType>(Ty))
+ visitSubroutineType(STy, false, std::unordered_map<uint32_t, StringRef>(),
+ TypeId);
+ else if (const auto *CTy = dyn_cast<DICompositeType>(Ty))
+ visitCompositeType(CTy);
+ else if (const auto *DTy = dyn_cast<DIDerivedType>(Ty))
+ visitDerivedType(DTy);
+ else
+ llvm_unreachable("Unknown DIType");
+}
+
+/// Read file contents from the actual file or from the source
+std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
+ auto File = SP->getFile();
+ std::string FileName;
+
+ if (File->getDirectory().size())
+ FileName = File->getDirectory().str() + "/" + File->getFilename().str();
+ else
+ FileName = File->getFilename();
+
+ // No need to populate the contends if it has been populated!
+ if (FileContent.find(FileName) != FileContent.end())
+ return FileName;
+
+ std::vector<std::string> Content;
+ std::string Line;
+ Content.push_back(Line); // Line 0 for empty string
+
+ auto Source = File->getSource();
+ if (Source) {
+ std::istringstream InputString(Source.getValue());
+ while (std::getline(InputString, Line))
+ Content.push_back(Line);
+ } else {
+ std::ifstream InputFile(FileName);
+ while (std::getline(InputFile, Line))
+ Content.push_back(Line);
+ }
+
+ FileContent[FileName] = Content;
+ return FileName;
+}
+
+void BTFDebug::constructLineInfo(const DISubprogram *SP, MCSymbol *Label,
+ uint32_t Line, uint32_t Column) {
+ std::string FileName = populateFileContent(SP);
+ BTFLineInfo LineInfo;
+
+ LineInfo.Label = Label;
+ LineInfo.FileNameOff = addString(FileName);
+ // If file content is not available, let LineOff = 0.
+ if (Line < FileContent[FileName].size())
+ LineInfo.LineOff = addString(FileContent[FileName][Line]);
+ else
+ LineInfo.LineOff = 0;
+ LineInfo.LineNum = Line;
+ LineInfo.ColumnNum = Column;
+ LineInfoTable[SecNameOff].push_back(LineInfo);
+}
+
+void BTFDebug::emitCommonHeader() {
+ OS.AddComment("0x" + Twine::utohexstr(BTF::MAGIC));
+ OS.EmitIntValue(BTF::MAGIC, 2);
+ OS.EmitIntValue(BTF::VERSION, 1);
+ OS.EmitIntValue(0, 1);
+}
+
+void BTFDebug::emitBTFSection() {
+ MCContext &Ctx = OS.getContext();
+ OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
+
+ // Emit header.
+ emitCommonHeader();
+ OS.EmitIntValue(BTF::HeaderSize, 4);
+
+ uint32_t TypeLen = 0, StrLen;
+ for (const auto &TypeEntry : TypeEntries)
+ TypeLen += TypeEntry->getSize();
+ StrLen = StringTable.getSize();
+
+ OS.EmitIntValue(0, 4);
+ OS.EmitIntValue(TypeLen, 4);
+ OS.EmitIntValue(TypeLen, 4);
+ OS.EmitIntValue(StrLen, 4);
+
+ // Emit type table.
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->emitType(OS);
+
+ // Emit string table.
+ uint32_t StringOffset = 0;
+ for (const auto &S : StringTable.getTable()) {
+ OS.AddComment("string offset=" + std::to_string(StringOffset));
+ OS.EmitBytes(S);
+ OS.EmitBytes(StringRef("\0", 1));
+ StringOffset += S.size() + 1;
+ }
+}
+
+void BTFDebug::emitBTFExtSection() {
+ MCContext &Ctx = OS.getContext();
+ OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
+
+ // Emit header.
+ emitCommonHeader();
+ OS.EmitIntValue(BTF::ExtHeaderSize, 4);
+
+ // Account for FuncInfo/LineInfo record size as well.
+ uint32_t FuncLen = 4, LineLen = 4;
+ for (const auto &FuncSec : FuncInfoTable) {
+ FuncLen += BTF::SecFuncInfoSize;
+ FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
+ }
+ for (const auto &LineSec : LineInfoTable) {
+ LineLen += BTF::SecLineInfoSize;
+ LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
+ }
+
+ OS.EmitIntValue(0, 4);
+ OS.EmitIntValue(FuncLen, 4);
+ OS.EmitIntValue(FuncLen, 4);
+ OS.EmitIntValue(LineLen, 4);
+
+ // Emit func_info table.
+ OS.AddComment("FuncInfo");
+ OS.EmitIntValue(BTF::BPFFuncInfoSize, 4);
+ for (const auto &FuncSec : FuncInfoTable) {
+ OS.AddComment("FuncInfo section string offset=" +
+ std::to_string(FuncSec.first));
+ OS.EmitIntValue(FuncSec.first, 4);
+ OS.EmitIntValue(FuncSec.second.size(), 4);
+ for (const auto &FuncInfo : FuncSec.second) {
+ Asm->EmitLabelReference(FuncInfo.Label, 4);
+ OS.EmitIntValue(FuncInfo.TypeId, 4);
+ }
+ }
+
+ // Emit line_info table.
+ OS.AddComment("LineInfo");
+ OS.EmitIntValue(BTF::BPFLineInfoSize, 4);
+ for (const auto &LineSec : LineInfoTable) {
+ OS.AddComment("LineInfo section string offset=" +
+ std::to_string(LineSec.first));
+ OS.EmitIntValue(LineSec.first, 4);
+ OS.EmitIntValue(LineSec.second.size(), 4);
+ for (const auto &LineInfo : LineSec.second) {
+ Asm->EmitLabelReference(LineInfo.Label, 4);
+ OS.EmitIntValue(LineInfo.FileNameOff, 4);
+ OS.EmitIntValue(LineInfo.LineOff, 4);
+ OS.AddComment("Line " + std::to_string(LineInfo.LineNum) + " Col " +
+ std::to_string(LineInfo.ColumnNum));
+ OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
+ }
+ }
+}
+
+void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
+ auto *SP = MF->getFunction().getSubprogram();
+ auto *Unit = SP->getUnit();
+
+ if (Unit->getEmissionKind() == DICompileUnit::NoDebug) {
+ SkipInstruction = true;
+ return;
+ }
+ SkipInstruction = false;
+
+ // Collect all types locally referenced in this function.
+ // Use RetainedNodes so we can collect all argument names
+ // even if the argument is not used.
+ std::unordered_map<uint32_t, StringRef> FuncArgNames;
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ visitTypeEntry(DV->getType().resolve());
+
+ // Collect function arguments for subprogram func type.
+ uint32_t Arg = DV->getArg();
+ if (Arg)
+ FuncArgNames[Arg] = DV->getName();
+ }
+ }
+
+ // Construct subprogram func proto type.
+ uint32_t ProtoTypeId;
+ visitSubroutineType(SP->getType(), true, FuncArgNames, ProtoTypeId);
+
+ // Construct subprogram func type
+ auto FuncTypeEntry =
+ llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
+ uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+
+ // Construct funcinfo and the first lineinfo for the function.
+ MCSymbol *FuncLabel = Asm->getFunctionBegin();
+ BTFFuncInfo FuncInfo;
+ FuncInfo.Label = FuncLabel;
+ FuncInfo.TypeId = FuncTypeId;
+ if (FuncLabel->isInSection()) {
+ MCSection &Section = FuncLabel->getSection();
+ const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+ assert(SectionELF && "Null section for Function Label");
+ SecNameOff = addString(SectionELF->getSectionName());
+ } else {
+ SecNameOff = addString(".text");
+ }
+ FuncInfoTable[SecNameOff].push_back(FuncInfo);
+}
+
+void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
+ SkipInstruction = false;
+ LineInfoGenerated = false;
+ SecNameOff = 0;
+}
+
+void BTFDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+
+ if (SkipInstruction || MI->isMetaInstruction() ||
+ MI->getFlag(MachineInstr::FrameSetup))
+ return;
+
+ if (MI->isInlineAsm()) {
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ ;
+
+ // Skip this inline asm instruction if the asmstr is empty.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ if (AsmStr[0] == 0)
+ return;
+ }
+
+ // Skip this instruction if no DebugLoc or the DebugLoc
+ // is the same as the previous instruction.
+ const DebugLoc &DL = MI->getDebugLoc();
+ if (!DL || PrevInstLoc == DL) {
+ // This instruction will be skipped, no LineInfo has
+ // been generated, construct one based on function signature.
+ if (LineInfoGenerated == false) {
+ auto *S = MI->getMF()->getFunction().getSubprogram();
+ MCSymbol *FuncLabel = Asm->getFunctionBegin();
+ constructLineInfo(S, FuncLabel, S->getLine(), 0);
+ LineInfoGenerated = true;
+ }
+
+ return;
+ }
+
+ // Create a temporary label to remember the insn for lineinfo.
+ MCSymbol *LineSym = OS.getContext().createTempSymbol();
+ OS.EmitLabel(LineSym);
+
+ // Construct the lineinfo.
+ auto SP = DL.get()->getScope()->getSubprogram();
+ constructLineInfo(SP, LineSym, DL.getLine(), DL.getCol());
+
+ LineInfoGenerated = true;
+ PrevInstLoc = DL;
+}
+
+void BTFDebug::endModule() {
+ // Collect all types referenced by globals.
+ const Module *M = MMI->getModule();
+ for (const DICompileUnit *CUNode : M->debug_compile_units()) {
+ for (const auto *GVE : CUNode->getGlobalVariables()) {
+ DIGlobalVariable *GV = GVE->getVariable();
+ visitTypeEntry(GV->getType().resolve());
+ }
+ }
+
+ // Complete BTF type cross refereences.
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->completeType(*this);
+
+ // Emit BTF sections.
+ emitBTFSection();
+ emitBTFExtSection();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm/lib/Target/BPF/BTFDebug.h
new file mode 100644
index 000000000000..afd4ed87f63d
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.h
@@ -0,0 +1,285 @@
+//===- BTFDebug.h -----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support for writing BTF debug info.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+#define LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include <unordered_map>
+#include "BTF.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class BTFDebug;
+class DIType;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
+
+/// The base class for BTF type generation.
+class BTFTypeBase {
+protected:
+ uint8_t Kind;
+ uint32_t Id;
+ struct BTF::CommonType BTFType;
+
+public:
+ virtual ~BTFTypeBase() = default;
+ void setId(uint32_t Id) { this->Id = Id; }
+ uint32_t getId() { return Id; }
+ uint32_t roundupToBytes(uint32_t NumBits) { return (NumBits + 7) >> 3; }
+ /// Get the size of this BTF type entry.
+ virtual uint32_t getSize() { return BTF::CommonTypeSize; }
+ /// Complete BTF type generation after all related DebugInfo types
+ /// have been visited so their BTF type id's are available
+ /// for cross referece.
+ virtual void completeType(BTFDebug &BDebug) {}
+ /// Emit types for this BTF type entry.
+ virtual void emitType(MCStreamer &OS);
+};
+
+/// Handle several derived types include pointer, const,
+/// volatile, typedef and restrict.
+class BTFTypeDerived : public BTFTypeBase {
+ const DIDerivedType *DTy;
+
+public:
+ BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag);
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle struct or union forward declaration.
+class BTFTypeFwd : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFwd(StringRef Name, bool IsUnion);
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle int type.
+class BTFTypeInt : public BTFTypeBase {
+ StringRef Name;
+ uint32_t IntVal; ///< Encoding, offset, bits
+
+public:
+ BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits,
+ StringRef TypeName);
+ uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle enumerate type.
+class BTFTypeEnum : public BTFTypeBase {
+ const DICompositeType *ETy;
+ std::vector<struct BTF::BTFEnum> EnumValues;
+
+public:
+ BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle array type.
+class BTFTypeArray : public BTFTypeBase {
+ const DICompositeType *ATy;
+ struct BTF::BTFArray ArrayInfo;
+
+public:
+ BTFTypeArray(const DICompositeType *ATy);
+ uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle struct/union type.
+class BTFTypeStruct : public BTFTypeBase {
+ const DICompositeType *STy;
+ bool HasBitField;
+ std::vector<struct BTF::BTFMember> Members;
+
+public:
+ BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
+ uint32_t NumMembers);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle function pointer.
+class BTFTypeFuncProto : public BTFTypeBase {
+ const DISubroutineType *STy;
+ std::unordered_map<uint32_t, StringRef> FuncArgNames;
+ std::vector<struct BTF::BTFParam> Parameters;
+
+public:
+ BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize;
+ }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle subprogram
+class BTFTypeFunc : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId);
+ uint32_t getSize() { return BTFTypeBase::getSize(); }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// String table.
+class BTFStringTable {
+ /// String table size in bytes.
+ uint32_t Size;
+ /// A mapping from string table offset to the index
+ /// of the Table. It is used to avoid putting
+ /// duplicated strings in the table.
+ std::unordered_map<uint32_t, uint32_t> OffsetToIdMap;
+ /// A vector of strings to represent the string table.
+ std::vector<std::string> Table;
+
+public:
+ BTFStringTable() : Size(0) {}
+ uint32_t getSize() { return Size; }
+ std::vector<std::string> &getTable() { return Table; }
+ /// Add a string to the string table and returns its offset
+ /// in the table.
+ uint32_t addString(StringRef S);
+};
+
+/// Represent one func and its type id.
+struct BTFFuncInfo {
+ const MCSymbol *Label; ///< Func MCSymbol
+ uint32_t TypeId; ///< Type id referring to .BTF type section
+};
+
+/// Represent one line info.
+struct BTFLineInfo {
+ MCSymbol *Label; ///< MCSymbol identifying insn for the lineinfo
+ uint32_t FileNameOff; ///< file name offset in the .BTF string table
+ uint32_t LineOff; ///< line offset in the .BTF string table
+ uint32_t LineNum; ///< the line number
+ uint32_t ColumnNum; ///< the column number
+};
+
+/// Collect and emit BTF information.
+class BTFDebug : public DebugHandlerBase {
+ MCStreamer &OS;
+ bool SkipInstruction;
+ bool LineInfoGenerated;
+ uint32_t SecNameOff;
+ uint32_t ArrayIndexTypeId;
+ BTFStringTable StringTable;
+ std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
+ std::unordered_map<const DIType *, uint32_t> DIToIdMap;
+ std::unordered_map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
+ std::unordered_map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+ StringMap<std::vector<std::string>> FileContent;
+
+ /// Add types to TypeEntries.
+ /// @{
+ /// Add types to TypeEntries and DIToIdMap.
+ void addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
+ /// Add types to TypeEntries only and return type id.
+ uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
+ /// @}
+
+ /// IR type visiting functions.
+ /// @{
+ void visitTypeEntry(const DIType *Ty);
+ void visitBasicType(const DIBasicType *BTy);
+ void visitSubroutineType(
+ const DISubroutineType *STy, bool ForSubprog,
+ const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+ uint32_t &TypeId);
+ void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion);
+ void visitCompositeType(const DICompositeType *CTy);
+ void visitStructType(const DICompositeType *STy, bool IsStruct);
+ void visitArrayType(const DICompositeType *ATy);
+ void visitEnumType(const DICompositeType *ETy);
+ void visitDerivedType(const DIDerivedType *DTy);
+ /// @}
+
+ /// Get the file content for the subprogram. Certain lines of the file
+ /// later may be put into string table and referenced by line info.
+ std::string populateFileContent(const DISubprogram *SP);
+
+ /// Construct a line info.
+ void constructLineInfo(const DISubprogram *SP, MCSymbol *Label, uint32_t Line,
+ uint32_t Column);
+
+ /// Emit common header of .BTF and .BTF.ext sections.
+ void emitCommonHeader();
+
+ /// Emit the .BTF section.
+ void emitBTFSection();
+
+ /// Emit the .BTF.ext section.
+ void emitBTFExtSection();
+
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Post process after all instructions in this function are processed.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+public:
+ BTFDebug(AsmPrinter *AP);
+
+ /// Get the special array index type id.
+ uint32_t getArrayIndexTypeId() {
+ assert(ArrayIndexTypeId);
+ return ArrayIndexTypeId;
+ }
+
+ /// Add string to the string table.
+ size_t addString(StringRef S) { return StringTable.addString(S); }
+
+ /// Get the type id for a particular DIType.
+ uint32_t getTypeId(const DIType *Ty) {
+ assert(Ty && "Invalid null Type");
+ assert(DIToIdMap.find(Ty) != DIToIdMap.end() &&
+ "DIType not added in the BDIToIdMap");
+ return DIToIdMap[Ty];
+ }
+
+ void setSymbolSize(const MCSymbol *Symbol, uint64_t Size) override {}
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+
+ /// Complete all the types and emit the BTF sections.
+ void endModule() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index e7790ddb3d7e..9f80b762fe36 100644
--- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
-#include "BPFSubtarget.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 134e890dfe49..32e79d0f527e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
@@ -50,6 +51,23 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case FK_Data_8:
return ELF::R_BPF_64_64;
case FK_Data_4:
+ // .BTF.ext generates FK_Data_4 relocations for
+ // insn offset by creating temporary labels.
+ // The insn offset is within the code section and
+ // already been fulfilled by applyFixup(). No
+ // further relocation is needed.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->getSymbol().isTemporary()) {
+ MCSection &Section = A->getSymbol().getSection();
+ const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+ assert(SectionELF && "Null section for reloc symbol");
+
+ // The reloc symbol should be in text section.
+ unsigned Flags = SectionELF->getFlags();
+ if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+ return ELF::R_BPF_NONE;
+ }
+ }
return ELF::R_BPF_64_32;
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 171f7f607ff4..af3ad5315253 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -30,8 +30,8 @@ public:
WeakRefDirective = "\t.weak\t";
UsesELFSectionDirectiveForBSS = true;
- HasSingleParameterDotFile = false;
- HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = true;
+ HasDotTypeDotSizeDirective = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 92bda224f3dc..2eb1f0fc8bd9 100644
--- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -313,8 +313,6 @@ public:
bool iss30_2Imm() const { return true; }
bool iss29_3Imm() const { return true; }
bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); }
- bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
- bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
@@ -581,6 +579,7 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
case Match_MnemonicFail:
return Error(IDLoc, "unrecognized instruction");
case Match_InvalidOperand:
+ case Match_InvalidTiedOperand:
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= InstOperands.size())
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 1a619ebda84e..428b42eba30d 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -9,7 +9,6 @@
#define DEBUG_TYPE "hexagon-disassembler"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
@@ -118,6 +117,10 @@ DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -146,62 +149,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<4>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<14>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<8>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<7>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<12>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<3>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<13>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<9>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<5>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
- const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
+#include "HexagonDepDecoders.h"
#include "HexagonGenDisassemblerTables.inc"
static MCDisassembler *createHexagonDisassembler(const Target &T,
@@ -664,6 +612,18 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
}
+LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg HvxVQRDecoderTable[] = {
+ Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3,
+ Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7};
+
+ return DecodeRegisterClass(Inst, RegNo >> 2, HvxVQRDecoderTable);
+}
+
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index 6ec52d18cdc4..c18492da803b 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -15,33 +15,6 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
-#define Hexagon_POINTER_SIZE 4
-
-#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
-#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
-#define Hexagon_WordSize Hexagon_PointerSize
-#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
-
-// allocframe saves LR and FP on stack before allocating
-// a new stack frame. This takes 8 bytes.
-#define HEXAGON_LRFP_SIZE 8
-
-// Normal instruction size (in bytes).
-#define HEXAGON_INSTR_SIZE 4
-
-// Maximum number of words and instructions in a packet.
-#define HEXAGON_PACKET_SIZE 4
-#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
-// Minimum number of instructions in an end-loop packet.
-#define HEXAGON_PACKET_INNER_SIZE 2
-#define HEXAGON_PACKET_OUTER_SIZE 3
-// Maximum number of instructions in a packet before shuffling,
-// including a compound one or a duplex or an extender.
-#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
-
-// Name of the global offset table as defined by the Hexagon ABI
-#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
-
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 69e263a425f8..868353e18832 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -25,6 +25,9 @@ include "llvm/Target/Target.td"
include "HexagonDepArch.td"
// Hexagon ISA Extensions
+def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true",
+ "Hexagon ZReg extension instructions">;
+
def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion",
"Hexagon::ArchEnum::V60", "Hexagon HVX instructions">;
def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
@@ -32,10 +35,14 @@ def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
[ExtensionHVX]>;
def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion",
"Hexagon::ArchEnum::V62", "Hexagon HVX instructions",
- [ExtensionHVX,ExtensionHVXV60]>;
+ [ExtensionHVX, ExtensionHVXV60]>;
def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion",
"Hexagon::ArchEnum::V65", "Hexagon HVX instructions",
- [ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>;
+ [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62]>;
+def ExtensionHVXV66: SubtargetFeature<"hvxv66", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V66", "Hexagon HVX instructions",
+ [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+ ExtensionZReg]>;
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
@@ -60,6 +67,9 @@ def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
"Enable generation of duplex instruction">;
def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
"true", "Reserve register R19">;
+def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
+ "NoreturnStackElim", "true",
+ "Eliminate stack allocation in a noreturn function when possible">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
@@ -78,6 +88,10 @@ def UseHVXV62 : Predicate<"HST->useHVXOps()">,
AssemblerPredicate<"ExtensionHVXV62">;
def UseHVXV65 : Predicate<"HST->useHVXOps()">,
AssemblerPredicate<"ExtensionHVXV65">;
+def UseHVXV66 : Predicate<"HST->useHVXOps()">,
+ AssemblerPredicate<"ExtensionHVXV66">;
+def UseZReg : Predicate<"HST->useZRegOps()">,
+ AssemblerPredicate<"ExtensionZReg">;
def Hvx64: HwMode<"+hvx-length64b">;
def Hvx128: HwMode<"+hvx-length128b">;
@@ -309,8 +323,6 @@ include "HexagonPatternsHVX.td"
include "HexagonPatternsV65.td"
include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
-include "HexagonMapAsm2IntrinV62.gen.td"
-include "HexagonMapAsm2IntrinV65.gen.td"
def HexagonInstrInfo : InstrInfo;
@@ -323,31 +335,31 @@ class Proc<string Name, SchedMachineModel Model,
: ProcessorModel<Name, Model, Features>;
def : Proc<"generic", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv4", HexagonModelV4,
- [ArchV4,
+ [ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv5", HexagonModelV4,
- [ArchV4, ArchV5,
+def : Proc<"hexagonv5", HexagonModelV5,
+ [ArchV5,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv55", HexagonModelV55,
- [ArchV4, ArchV5, ArchV55,
+ [ArchV5, ArchV55,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv60", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60,
+ [ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv62", HexagonModelV62,
- [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
+ [ArchV5, ArchV55, ArchV60, ArchV62,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv65", HexagonModelV65,
- [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+ FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
+ FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv66", HexagonModelV66,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66,
FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
FeatureNVS, FeaturePackets, FeatureSmallData]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 0ac83ea7c5fc..f44fb16e2d8e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -755,7 +755,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
if (MI->isBundle()) {
- assert(Subtarget->usePackets() && "Support for packets is disabled");
const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index ba255d30fede..1bdebe557a8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1985,6 +1985,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
case Hexagon::S2_storeri_io:
V = int32_t(U);
break;
+ default:
+ // Opc is already checked above to be one of the three store instructions.
+ // This silences a -Wuninitialized false positive on GCC 5.4.
+ llvm_unreachable("Unexpected store opcode");
}
if (!isInt<8>(V))
return false;
@@ -2223,6 +2227,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI,
for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
// The number of leading zeros here should be the number of trailing
// non-zeros in RC.
+ unsigned SRC = MRI.getRegClass(S)->getID();
+ if (SRC != Hexagon::IntRegsRegClassID &&
+ SRC != Hexagon::DoubleRegsRegClassID)
+ continue;
if (!BT.has(S))
continue;
const BitTracker::RegisterCell &SC = BT.lookup(S);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 94aacbed6af6..92b6da871a4c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -93,11 +93,12 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
unsigned ID = RC.getID();
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
- auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
+ const auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
bool IsSubLo = (Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo));
switch (ID) {
case Hexagon::DoubleRegsRegClassID:
case Hexagon::HvxWRRegClassID:
+ case Hexagon::HvxVQRRegClassID:
return IsSubLo ? BT::BitMask(0, RW-1)
: BT::BitMask(RW, 2*RW-1);
default:
@@ -114,9 +115,13 @@ uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const {
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
using namespace Hexagon;
- for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass})
- if (RC.contains(Reg))
- return TRI.getRegSizeInBits(RC);
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ if (HST.useHVXOps()) {
+ for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass,
+ HvxVQRRegClass})
+ if (RC.contains(Reg))
+ return TRI.getRegSizeInBits(RC);
+ }
// Default treatment for other physical registers.
if (const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg))
return TRI.getRegSizeInBits(*RC);
@@ -142,6 +147,8 @@ const TargetRegisterClass &HexagonEvaluator::composeWithSubRegIndex(
return Hexagon::IntRegsRegClass;
case Hexagon::HvxWRRegClassID:
return Hexagon::HvxVRRegClass;
+ case Hexagon::HvxVQRRegClassID:
+ return Hexagon::HvxWRRegClass;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index cbce61bc63c9..ba9f638796eb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -376,7 +376,7 @@ namespace {
using IndexList = SetVector<unsigned>;
using ExtenderInit = std::pair<ExtValue, ExtExpr>;
using AssignmentMap = std::map<ExtenderInit, IndexList>;
- using LocDefMap = std::map<Loc, IndexList>;
+ using LocDefList = std::vector<std::pair<Loc, IndexList>>;
const HexagonInstrInfo *HII = nullptr;
const HexagonRegisterInfo *HRI = nullptr;
@@ -399,7 +399,7 @@ namespace {
void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
AssignmentMap &IMap);
void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
- LocDefMap &Defs);
+ LocDefList &Defs);
Register insertInitializer(Loc DefL, const ExtenderInit &ExtI);
bool replaceInstrExact(const ExtDesc &ED, Register ExtR);
bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
@@ -730,21 +730,13 @@ bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {
}
case MachineOperand::MO_ExternalSymbol:
return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);
- case MachineOperand::MO_GlobalAddress: {
- // Global values may not have names, so compare their positions
- // in the parent module.
- const Module &M = *V.GV->getParent();
- auto FindPos = [&M] (const GlobalValue &V) {
- unsigned P = 0;
- for (const GlobalValue &T : M.global_values()) {
- if (&T == &V)
- return P;
- P++;
- }
- llvm_unreachable("Global value not found in module");
- };
- return FindPos(*V.GV) < FindPos(*ER.V.GV);
- }
+ case MachineOperand::MO_GlobalAddress:
+ // Do not use GUIDs, since they depend on the source path. Moving the
+ // source file to a different directory could cause different GUID
+ // values for a pair of given symbols. These symbols could then compare
+ // "less" in one directory, but "greater" in another.
+ assert(!V.GV->getName().empty() && !ER.V.GV->getName().empty());
+ return V.GV->getName() < ER.V.GV->getName();
case MachineOperand::MO_BlockAddress: {
const BasicBlock *ThisB = V.BA->getBasicBlock();
const BasicBlock *OtherB = ER.V.BA->getBasicBlock();
@@ -796,6 +788,7 @@ HCE::ExtValue::operator MachineOperand() const {
return MachineOperand::CreateCPI(V.ImmVal, Offset, TF);
case MachineOperand::MO_JumpTableIndex:
assert(Offset == 0);
+ return MachineOperand::CreateJTI(V.ImmVal, TF);
default:
llvm_unreachable("Unhandled kind");
}
@@ -1215,12 +1208,19 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
case Hexagon::S4_subaddi: // (__: ## - Rs<<0)
ED.Expr.Rs = MI.getOperand(OpNum+1);
ED.Expr.Neg = true;
+ break;
default: // (__: ## + __<<_)
break;
}
}
ED.UseMI = &MI;
+
+ // Ignore unnamed globals.
+ ExtRoot ER(ED.getOp());
+ if (ER.Kind == MachineOperand::MO_GlobalAddress)
+ if (ER.V.GV->getName().empty())
+ return;
Extenders.push_back(ED);
}
@@ -1243,9 +1243,13 @@ void HCE::collectInstr(MachineInstr &MI) {
void HCE::collect(MachineFunction &MF) {
Extenders.clear();
- for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip unreachable blocks.
+ if (MBB.getNumber() == -1)
+ continue;
for (MachineInstr &MI : MBB)
collectInstr(MI);
+ }
}
void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
@@ -1470,7 +1474,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
}
void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
- LocDefMap &Defs) {
+ LocDefList &Defs) {
if (Refs.empty())
return;
@@ -1517,7 +1521,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
It = DomB->getFirstTerminator();
}
Loc DefLoc(DomB, It);
- Defs.emplace(DefLoc, Refs);
+ Defs.emplace_back(DefLoc, Refs);
}
HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
@@ -1629,7 +1633,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
else
MIB.add(MachineOperand(ExtR));
}
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1680,7 +1684,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
// Add the stored value for stores.
if (MI.mayStore())
MIB.add(getStoredValueOp(MI));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1715,6 +1719,15 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
// Clamp Diff to the 16 bit range.
int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);
+ if (Diff > 32767) {
+ // Split Diff into two values: one that is close to min/max int16,
+ // and the other being the rest, and such that both have the same
+ // "alignment" as Diff.
+ uint32_t UD = Diff;
+ OffsetRange R = getOffsetRange(MI.getOperand(0));
+ uint32_t A = std::min<uint32_t>(R.Align, 1u << countTrailingZeros(UD));
+ D &= ~(A-1);
+ }
BuildMI(MBB, At, dl, HII->get(IdxOpc))
.add(MI.getOperand(0))
.add(MachineOperand(ExtR))
@@ -1797,7 +1810,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
// Add the stored value for stores.
if (MI.mayStore())
MIB.add(getStoredValueOp(MI));
- MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1878,7 +1891,7 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
}
bool HCE::replaceExtenders(const AssignmentMap &IMap) {
- LocDefMap Defs;
+ LocDefList Defs;
bool Changed = false;
for (const std::pair<ExtenderInit,IndexList> &P : IMap) {
@@ -1931,6 +1944,11 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {
bool HCE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ if (MF.getFunction().hasPersonalityFn()) {
+ LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF.getName()
+ << " due to exception handling\n");
+ return false;
+ }
LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
@@ -1940,10 +1958,24 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
AssignmentMap IMap;
collect(MF);
- llvm::sort(Extenders.begin(), Extenders.end(),
- [](const ExtDesc &A, const ExtDesc &B) {
- return ExtValue(A) < ExtValue(B);
- });
+ llvm::sort(Extenders, [this](const ExtDesc &A, const ExtDesc &B) {
+ ExtValue VA(A), VB(B);
+ if (VA != VB)
+ return VA < VB;
+ const MachineInstr *MA = A.UseMI;
+ const MachineInstr *MB = B.UseMI;
+ if (MA == MB) {
+ // If it's the same instruction, compare operand numbers.
+ return A.OpNum < B.OpNum;
+ }
+
+ const MachineBasicBlock *BA = MA->getParent();
+ const MachineBasicBlock *BB = MB->getParent();
+ assert(BA->getNumber() != -1 && BB->getNumber() != -1);
+ if (BA != BB)
+ return BA->getNumber() < BB->getNumber();
+ return MDT->dominates(MA, MB);
+ });
bool Changed = false;
LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 8f22a71dc1f3..fa192391313e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2463,6 +2463,7 @@ APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX,
case Hexagon::A4_cmpheqi: // s8
case Hexagon::C4_cmpneqi: // s8
Signed = true;
+ break;
case Hexagon::A4_cmpbeqi: // u8
break;
case Hexagon::C2_cmpgtui: // u9
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index fccde96d8a32..28965b69e284 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -555,8 +555,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
continue;
- // Check that the two instructions are combinable. V4 allows more
- // instructions to be merged into a combine.
+ // Check that the two instructions are combinable.
// The order matters because in a A2_tfrsi we might can encode a int8 as
// the hi reg operand but only a uint6 as the low reg operand.
if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) ||
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
index dc75f8f63400..dff2b2f471d0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.h ---------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,12 +10,11 @@
//===----------------------------------------------------------------------===//
-
#ifndef HEXAGON_DEP_ARCH_H
#define HEXAGON_DEP_ARCH_H
namespace llvm {
namespace Hexagon {
-enum class ArchEnum { V4,V5,V55,V60,V62,V65 };
+enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66 };
} // namespace Hexagon
} // namespace llvm;
#endif // HEXAGON_DEP_ARCH_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 3594379aa841..f1aadae555c8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.td --------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,8 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
+def ArchV66: SubtargetFeature<"v66", "HexagonArchVersion", "Hexagon::ArchEnum::V66", "Enable Hexagon V66 architecture">;
+def HasV66 : Predicate<"HST->hasV66Ops()">, AssemblerPredicate<"ArchV66">;
def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">;
def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
@@ -18,7 +19,5 @@ def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
-def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
new file mode 100644
index 000000000000..9f78412f45d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+// clang-format off
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<4>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<14>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<7>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<12>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<3>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<13>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<9>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<5>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+// clang-format on
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index b27cdae81a28..9e3dea9f3e9b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICHVX.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,1849 +9,2549 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-def tc_0317c6ca : InstrItinClass;
-def tc_1b93bdc6 : InstrItinClass;
-def tc_2171ebae : InstrItinClass;
-def tc_28978789 : InstrItinClass;
-def tc_29841470 : InstrItinClass;
-def tc_316c637c : InstrItinClass;
-def tc_354299ad : InstrItinClass;
-def tc_35e92f8e : InstrItinClass;
-def tc_38208312 : InstrItinClass;
-def tc_4105d6b5 : InstrItinClass;
-def tc_41f4b64e : InstrItinClass;
-def tc_41f99e1c : InstrItinClass;
-def tc_45453b98 : InstrItinClass;
-def tc_4e2a5159 : InstrItinClass;
-def tc_4f190ba3 : InstrItinClass;
-def tc_4fd8566e : InstrItinClass;
-def tc_51cd3aab : InstrItinClass;
-def tc_5a9fc4ec : InstrItinClass;
-def tc_5c03dc63 : InstrItinClass;
-def tc_5c120602 : InstrItinClass;
-def tc_5cbf490b : InstrItinClass;
-def tc_63e3d94c : InstrItinClass;
-def tc_644584f8 : InstrItinClass;
-def tc_66bb62ea : InstrItinClass;
-def tc_69b6dd20 : InstrItinClass;
-def tc_6b78cf13 : InstrItinClass;
-def tc_6fd9ad30 : InstrItinClass;
-def tc_71337255 : InstrItinClass;
-def tc_72ad7b54 : InstrItinClass;
-def tc_7474003e : InstrItinClass;
-def tc_77a4c701 : InstrItinClass;
-def tc_7c3f55c4 : InstrItinClass;
-def tc_7e9f581b : InstrItinClass;
-def tc_7fa82b08 : InstrItinClass;
-def tc_7fa8b40f : InstrItinClass;
-def tc_85d237e3 : InstrItinClass;
-def tc_8a6eb39a : InstrItinClass;
-def tc_8b6a873f : InstrItinClass;
-def tc_908a4c8c : InstrItinClass;
-def tc_9311da3f : InstrItinClass;
-def tc_94f43c04 : InstrItinClass;
-def tc_9777e6bf : InstrItinClass;
-def tc_97c165b9 : InstrItinClass;
-def tc_98733e9d : InstrItinClass;
-def tc_99093773 : InstrItinClass;
-def tc_9b9642a1 : InstrItinClass;
-def tc_9c267309 : InstrItinClass;
-def tc_a3127e12 : InstrItinClass;
-def tc_a4c9df3b : InstrItinClass;
-def tc_a807365d : InstrItinClass;
-def tc_aedb9f9e : InstrItinClass;
-def tc_b06ab583 : InstrItinClass;
-def tc_b712833a : InstrItinClass;
-def tc_b77635b4 : InstrItinClass;
-def tc_bbaf280e : InstrItinClass;
-def tc_bf142ae2 : InstrItinClass;
-def tc_bfe309d5 : InstrItinClass;
-def tc_c00bf9c9 : InstrItinClass;
-def tc_c4b515c5 : InstrItinClass;
-def tc_cbf6d1dc : InstrItinClass;
-def tc_cedf314b : InstrItinClass;
-def tc_d2cb81ea : InstrItinClass;
-def tc_d5090f3e : InstrItinClass;
-def tc_d642eff3 : InstrItinClass;
-def tc_d725e5b0 : InstrItinClass;
-def tc_d7bea0ec : InstrItinClass;
-def tc_d98f4d63 : InstrItinClass;
-def tc_da979fb3 : InstrItinClass;
-def tc_db5b9e2f : InstrItinClass;
-def tc_df54ad52 : InstrItinClass;
-def tc_e172d86a : InstrItinClass;
-def tc_e231aa4f : InstrItinClass;
-def tc_e3748cdf : InstrItinClass;
-def tc_e5053c8f : InstrItinClass;
-def tc_e6299d16 : InstrItinClass;
-def tc_eb669007 : InstrItinClass;
-def tc_ec58f88a : InstrItinClass;
-def tc_eda67dcd : InstrItinClass;
-def tc_ee927c0e : InstrItinClass;
-def tc_f3fc3f83 : InstrItinClass;
-def tc_fa99dc24 : InstrItinClass;
+def tc_04da405a : InstrItinClass;
+def tc_05058f6f : InstrItinClass;
+def tc_05ac6f98 : InstrItinClass;
+def tc_05ca8cfd : InstrItinClass;
+def tc_08a4f1b6 : InstrItinClass;
+def tc_0b04c6c7 : InstrItinClass;
+def tc_0ec46cf9 : InstrItinClass;
+def tc_131f1c81 : InstrItinClass;
+def tc_1381a97c : InstrItinClass;
+def tc_15fdf750 : InstrItinClass;
+def tc_16ff9ef8 : InstrItinClass;
+def tc_191381c1 : InstrItinClass;
+def tc_1ad8a370 : InstrItinClass;
+def tc_1ba8a0cd : InstrItinClass;
+def tc_20a4bbec : InstrItinClass;
+def tc_257f6f7c : InstrItinClass;
+def tc_26a377fe : InstrItinClass;
+def tc_2c745bb8 : InstrItinClass;
+def tc_2d4051cd : InstrItinClass;
+def tc_2e8f5f6e : InstrItinClass;
+def tc_309dbb4f : InstrItinClass;
+def tc_3904b926 : InstrItinClass;
+def tc_3aacf4a8 : InstrItinClass;
+def tc_3ad719fb : InstrItinClass;
+def tc_3c56e5ce : InstrItinClass;
+def tc_3ce09744 : InstrItinClass;
+def tc_3e2aaafc : InstrItinClass;
+def tc_447d9895 : InstrItinClass;
+def tc_453fe68d : InstrItinClass;
+def tc_46d6c3e0 : InstrItinClass;
+def tc_51d0ecc3 : InstrItinClass;
+def tc_52447ecc : InstrItinClass;
+def tc_540c3da3 : InstrItinClass;
+def tc_54a0dc47 : InstrItinClass;
+def tc_561aaa58 : InstrItinClass;
+def tc_56c4f9fe : InstrItinClass;
+def tc_56e64202 : InstrItinClass;
+def tc_58d21193 : InstrItinClass;
+def tc_5bf8afbb : InstrItinClass;
+def tc_61bf7c03 : InstrItinClass;
+def tc_649072c2 : InstrItinClass;
+def tc_660769f1 : InstrItinClass;
+def tc_663c80a7 : InstrItinClass;
+def tc_6942b6e0 : InstrItinClass;
+def tc_6e7fa133 : InstrItinClass;
+def tc_71646d06 : InstrItinClass;
+def tc_7177e272 : InstrItinClass;
+def tc_718b5c53 : InstrItinClass;
+def tc_7273323b : InstrItinClass;
+def tc_7417e785 : InstrItinClass;
+def tc_767c4e9d : InstrItinClass;
+def tc_7e6a3e89 : InstrItinClass;
+def tc_8772086c : InstrItinClass;
+def tc_87adc037 : InstrItinClass;
+def tc_8e420e4d : InstrItinClass;
+def tc_90bcc1db : InstrItinClass;
+def tc_933f2b39 : InstrItinClass;
+def tc_946013d8 : InstrItinClass;
+def tc_9d1dc972 : InstrItinClass;
+def tc_9f363d21 : InstrItinClass;
+def tc_a02a10a8 : InstrItinClass;
+def tc_a0dbea28 : InstrItinClass;
+def tc_a7e6707d : InstrItinClass;
+def tc_ab23f776 : InstrItinClass;
+def tc_abe8c3b2 : InstrItinClass;
+def tc_ac4046bc : InstrItinClass;
+def tc_af25efd9 : InstrItinClass;
+def tc_b091f1c6 : InstrItinClass;
+def tc_b28e51aa : InstrItinClass;
+def tc_b4416217 : InstrItinClass;
+def tc_b9db8205 : InstrItinClass;
+def tc_c0749f3c : InstrItinClass;
+def tc_c127de3a : InstrItinClass;
+def tc_c4edf264 : InstrItinClass;
+def tc_c5dba46e : InstrItinClass;
+def tc_c7039829 : InstrItinClass;
+def tc_cd94bfe0 : InstrItinClass;
+def tc_d8287c14 : InstrItinClass;
+def tc_db5555f3 : InstrItinClass;
+def tc_dd5b0695 : InstrItinClass;
+def tc_df80eeb0 : InstrItinClass;
+def tc_e2d2e9e5 : InstrItinClass;
+def tc_e35c1e93 : InstrItinClass;
+def tc_e3f68a46 : InstrItinClass;
+def tc_e675c45a : InstrItinClass;
+def tc_e699ae41 : InstrItinClass;
+def tc_e8797b98 : InstrItinClass;
+def tc_e99d4c2e : InstrItinClass;
+def tc_f1de44ef : InstrItinClass;
+def tc_f21e8abb : InstrItinClass;
+def tc_fd7610da : InstrItinClass;
class DepHVXItinV55 {
list<InstrItinData> DepHVXItinV55_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+class DepHVXItinV60 {
+ list<InstrItinData> DepHVXItinV60_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_ALL]>], [],
[]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
-class DepHVXItinV60 {
- list<InstrItinData> DepHVXItinV60_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
+ InstrStage<1, [CVI_ALL]>], [3, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV62 {
+ list<InstrItinData> DepHVXItinV62_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-class DepHVXItinV62 {
- list<InstrItinData> DepHVXItinV62_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
+ InstrStage<1, [CVI_ALL]>], [3, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
[HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ InstrStage<1, [CVI_ALL]>], [3],
[HVX_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV65 {
+ list<InstrItinData> DepHVXItinV65_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
-
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
[Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-class DepHVXItinV65 {
- list<InstrItinData> DepHVXItinV65_list = [
- InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_29841470, /*SLOT0,STORE*/
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
[InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_LD], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
- [HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
- InstrItinData <tc_71337255, /*SLOT0123,VA*/
+class DepHVXItinV66 {
+ list<InstrItinData> DepHVXItinV66_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
[HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
- InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
[HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
- [HVX_FWD]>,
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9311da3f, /*SLOT23,VX*/
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
- [Hex_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
- [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
- InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
- [HVX_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
- [InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [2],
- [Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
[HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
- [InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [3],
- [HVX_FWD]>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_SHIFT]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
[Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [SLOT1], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLSHF]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
[HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
[HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
- [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
- InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
- [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
- InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
[InstrStage<1, [SLOT0], 0>,
InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
[Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ALL]>], [],
- []>,
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5],
- [HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
[InstrStage<1, [SLOT0, SLOT1], 0>,
- InstrStage<1, [CVI_LD], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
- [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
- InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
[InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
InstrStage<1, [CVI_ST], 0>,
- InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
- [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
- InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
[InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
- InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
- [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
- [HVX_FWD, HVX_FWD, HVX_FWD]>,
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
- InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
[InstrStage<1, [SLOT2], 0>,
InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
- [HVX_FWD, HVX_FWD, Hex_FWD]>
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 931504b56ccb..9da25952fb1c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICScalar.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,3087 +9,3789 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-def tc_00afc57e : InstrItinClass;
-def tc_00e7c26e : InstrItinClass;
-def tc_03220ffa : InstrItinClass;
-def tc_038a1342 : InstrItinClass;
-def tc_04c9decc : InstrItinClass;
-def tc_05b6c987 : InstrItinClass;
-def tc_0cd51c76 : InstrItinClass;
-def tc_0dc560de : InstrItinClass;
-def tc_0fc1ae07 : InstrItinClass;
-def tc_10b97e27 : InstrItinClass;
-def tc_1372bca1 : InstrItinClass;
-def tc_14cd4cfa : InstrItinClass;
-def tc_15411484 : InstrItinClass;
-def tc_16d0d8d5 : InstrItinClass;
-def tc_181af5d0 : InstrItinClass;
-def tc_1853ea6d : InstrItinClass;
-def tc_1b82a277 : InstrItinClass;
-def tc_1b9c9ee5 : InstrItinClass;
-def tc_1d5a38a8 : InstrItinClass;
-def tc_1e856f58 : InstrItinClass;
-def tc_234a11a5 : InstrItinClass;
-def tc_238d91d2 : InstrItinClass;
-def tc_29175780 : InstrItinClass;
-def tc_2a160009 : InstrItinClass;
-def tc_2b2f4060 : InstrItinClass;
-def tc_2b6f77c6 : InstrItinClass;
-def tc_2f185f5c : InstrItinClass;
-def tc_2fc0c436 : InstrItinClass;
-def tc_351fed2d : InstrItinClass;
-def tc_3669266a : InstrItinClass;
-def tc_367f7f3d : InstrItinClass;
-def tc_36c68ad1 : InstrItinClass;
-def tc_395dc00f : InstrItinClass;
-def tc_3bc2c5d3 : InstrItinClass;
-def tc_3cb8ea06 : InstrItinClass;
-def tc_3d04548d : InstrItinClass;
-def tc_3da80ba5 : InstrItinClass;
-def tc_3e07fb90 : InstrItinClass;
-def tc_41d5298e : InstrItinClass;
-def tc_4403ca65 : InstrItinClass;
-def tc_44126683 : InstrItinClass;
-def tc_452f85af : InstrItinClass;
-def tc_481e5e5c : InstrItinClass;
-def tc_49eb22c8 : InstrItinClass;
-def tc_4ca572d4 : InstrItinClass;
-def tc_4d9914c9 : InstrItinClass;
-def tc_4d99bca9 : InstrItinClass;
-def tc_4f7cd700 : InstrItinClass;
-def tc_513bef45 : InstrItinClass;
-def tc_51b866be : InstrItinClass;
-def tc_523fcf30 : InstrItinClass;
-def tc_5274e61a : InstrItinClass;
-def tc_52d7bbea : InstrItinClass;
-def tc_53bc8a6a : InstrItinClass;
-def tc_53bdb2f6 : InstrItinClass;
-def tc_540fdfbc : InstrItinClass;
-def tc_55050d58 : InstrItinClass;
-def tc_57288781 : InstrItinClass;
-def tc_594ab548 : InstrItinClass;
-def tc_59a01ead : InstrItinClass;
-def tc_5acef64a : InstrItinClass;
-def tc_5ba5997d : InstrItinClass;
-def tc_5eb851fc : InstrItinClass;
-def tc_5f6847a1 : InstrItinClass;
-def tc_60571023 : InstrItinClass;
-def tc_609d2efe : InstrItinClass;
-def tc_63fe3df7 : InstrItinClass;
-def tc_66888ded : InstrItinClass;
-def tc_6792d5ff : InstrItinClass;
-def tc_681a2300 : InstrItinClass;
-def tc_68cb12ce : InstrItinClass;
-def tc_6aa5711a : InstrItinClass;
-def tc_6ac37025 : InstrItinClass;
-def tc_6ebb4a12 : InstrItinClass;
-def tc_6efc556e : InstrItinClass;
-def tc_6fa4db47 : InstrItinClass;
-def tc_73043bf4 : InstrItinClass;
-def tc_746baa8e : InstrItinClass;
-def tc_74e47fd9 : InstrItinClass;
-def tc_7934b9df : InstrItinClass;
-def tc_7a830544 : InstrItinClass;
-def tc_7f881c76 : InstrItinClass;
-def tc_84df2cd3 : InstrItinClass;
-def tc_855b0b61 : InstrItinClass;
-def tc_87735c3b : InstrItinClass;
-def tc_897d1a9d : InstrItinClass;
-def tc_8b15472a : InstrItinClass;
-def tc_8fd5f294 : InstrItinClass;
-def tc_8fe6b782 : InstrItinClass;
-def tc_90f3e30c : InstrItinClass;
-def tc_976ddc4f : InstrItinClass;
-def tc_97743097 : InstrItinClass;
-def tc_994333cd : InstrItinClass;
-def tc_999d32db : InstrItinClass;
-def tc_99be14ca : InstrItinClass;
-def tc_9c00ce8d : InstrItinClass;
-def tc_9c98e8af : InstrItinClass;
-def tc_9d5941c7 : InstrItinClass;
-def tc_9ef61e5c : InstrItinClass;
-def tc_9faf76ae : InstrItinClass;
-def tc_9fdb5406 : InstrItinClass;
-def tc_a21dc435 : InstrItinClass;
-def tc_a27582fa : InstrItinClass;
-def tc_a46f0df5 : InstrItinClass;
-def tc_a788683e : InstrItinClass;
-def tc_a8acdac0 : InstrItinClass;
-def tc_a904d137 : InstrItinClass;
-def tc_adb14c66 : InstrItinClass;
-def tc_b13761ae : InstrItinClass;
-def tc_b166348b : InstrItinClass;
-def tc_b44c6e2a : InstrItinClass;
-def tc_b77c481f : InstrItinClass;
-def tc_b7dd427e : InstrItinClass;
-def tc_b9488031 : InstrItinClass;
-def tc_b9c0b731 : InstrItinClass;
-def tc_b9c4623f : InstrItinClass;
-def tc_bad2bcaf : InstrItinClass;
-def tc_bcc96cee : InstrItinClass;
-def tc_bde7aaf4 : InstrItinClass;
-def tc_be706f30 : InstrItinClass;
-def tc_c2f7d806 : InstrItinClass;
-def tc_c5e2426d : InstrItinClass;
-def tc_c6aa82f7 : InstrItinClass;
-def tc_c6ce9b3f : InstrItinClass;
-def tc_c6ebf8dd : InstrItinClass;
-def tc_c74f796f : InstrItinClass;
-def tc_c82dc1ff : InstrItinClass;
-def tc_caaebcba : InstrItinClass;
-def tc_cd7374a0 : InstrItinClass;
-def tc_cde8b071 : InstrItinClass;
-def tc_cf47a43f : InstrItinClass;
-def tc_cf59f215 : InstrItinClass;
-def tc_d088982c : InstrItinClass;
-def tc_d1090e34 : InstrItinClass;
-def tc_d24b2d85 : InstrItinClass;
-def tc_d580173f : InstrItinClass;
-def tc_d6bf0472 : InstrItinClass;
-def tc_d9709180 : InstrItinClass;
-def tc_d9f95eef : InstrItinClass;
-def tc_daa058fa : InstrItinClass;
-def tc_dbdffe3d : InstrItinClass;
-def tc_e0739b8c : InstrItinClass;
-def tc_e1e99bfa : InstrItinClass;
-def tc_e216a5db : InstrItinClass;
-def tc_e421e012 : InstrItinClass;
-def tc_e7624c08 : InstrItinClass;
-def tc_e7d02c66 : InstrItinClass;
-def tc_e913dc32 : InstrItinClass;
-def tc_e9c822f7 : InstrItinClass;
-def tc_e9fae2d6 : InstrItinClass;
-def tc_ef52ed71 : InstrItinClass;
-def tc_ef84f62f : InstrItinClass;
-def tc_f2704b9a : InstrItinClass;
-def tc_f3eaa14b : InstrItinClass;
-def tc_f47d212f : InstrItinClass;
-def tc_f49e76f4 : InstrItinClass;
-def tc_f7dd9c9f : InstrItinClass;
-def tc_f86c328a : InstrItinClass;
-def tc_f8eeed7a : InstrItinClass;
-def tc_fcab4871 : InstrItinClass;
-def tc_ff9ee76e : InstrItinClass;
-
-class DepScalarItinV4 {
- list<InstrItinData> DepScalarItinV4_list = [
- InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]> ];
-}
+def tc_002cb246 : InstrItinClass;
+def tc_0371abea : InstrItinClass;
+def tc_05c070ec : InstrItinClass;
+def tc_05d3a09b : InstrItinClass;
+def tc_0663f615 : InstrItinClass;
+def tc_096199d3 : InstrItinClass;
+def tc_0a705168 : InstrItinClass;
+def tc_0ae0825c : InstrItinClass;
+def tc_0b2be201 : InstrItinClass;
+def tc_0d8f5752 : InstrItinClass;
+def tc_13bfbcf9 : InstrItinClass;
+def tc_14b272fa : InstrItinClass;
+def tc_14b5c689 : InstrItinClass;
+def tc_15aa71c5 : InstrItinClass;
+def tc_174516e8 : InstrItinClass;
+def tc_17e0d2cd : InstrItinClass;
+def tc_1a2fd869 : InstrItinClass;
+def tc_1ad90acd : InstrItinClass;
+def tc_1ae57e39 : InstrItinClass;
+def tc_1b6f7cec : InstrItinClass;
+def tc_1c4528a2 : InstrItinClass;
+def tc_1c80410a : InstrItinClass;
+def tc_1d81e60e : InstrItinClass;
+def tc_1fc97744 : InstrItinClass;
+def tc_20cdee80 : InstrItinClass;
+def tc_2332b92e : InstrItinClass;
+def tc_24b66c99 : InstrItinClass;
+def tc_25a78932 : InstrItinClass;
+def tc_2b8da4c2 : InstrItinClass;
+def tc_2eabeebe : InstrItinClass;
+def tc_2f7c551d : InstrItinClass;
+def tc_2ff964b4 : InstrItinClass;
+def tc_30b9bb4a : InstrItinClass;
+def tc_32779c6f : InstrItinClass;
+def tc_36153880 : InstrItinClass;
+def tc_362c6592 : InstrItinClass;
+def tc_3962fa26 : InstrItinClass;
+def tc_39dfefe8 : InstrItinClass;
+def tc_3a867367 : InstrItinClass;
+def tc_3b470976 : InstrItinClass;
+def tc_3b5b7ef9 : InstrItinClass;
+def tc_3bd75825 : InstrItinClass;
+def tc_3c76b0ff : InstrItinClass;
+def tc_3d495a39 : InstrItinClass;
+def tc_40116ca8 : InstrItinClass;
+def tc_434c8e1e : InstrItinClass;
+def tc_4414d8b1 : InstrItinClass;
+def tc_44d3da28 : InstrItinClass;
+def tc_4560740b : InstrItinClass;
+def tc_4837eefb : InstrItinClass;
+def tc_49a8207d : InstrItinClass;
+def tc_4ae7b58b : InstrItinClass;
+def tc_4b68bce4 : InstrItinClass;
+def tc_4c5ba658 : InstrItinClass;
+def tc_4d5fa3a1 : InstrItinClass;
+def tc_53559e35 : InstrItinClass;
+def tc_56336eb0 : InstrItinClass;
+def tc_56f114f4 : InstrItinClass;
+def tc_57890846 : InstrItinClass;
+def tc_5a2711e5 : InstrItinClass;
+def tc_5abb5e3f : InstrItinClass;
+def tc_5aee39f7 : InstrItinClass;
+def tc_5b54b33f : InstrItinClass;
+def tc_5b7c0967 : InstrItinClass;
+def tc_5bf126a6 : InstrItinClass;
+def tc_5d7f5414 : InstrItinClass;
+def tc_5ef37dc4 : InstrItinClass;
+def tc_6132ba3d : InstrItinClass;
+def tc_61830035 : InstrItinClass;
+def tc_640086b5 : InstrItinClass;
+def tc_643b4717 : InstrItinClass;
+def tc_67435e81 : InstrItinClass;
+def tc_675e4897 : InstrItinClass;
+def tc_679309b8 : InstrItinClass;
+def tc_6b25e783 : InstrItinClass;
+def tc_703e822c : InstrItinClass;
+def tc_7186d325 : InstrItinClass;
+def tc_7646c131 : InstrItinClass;
+def tc_76851da1 : InstrItinClass;
+def tc_779080bf : InstrItinClass;
+def tc_784490da : InstrItinClass;
+def tc_785f65a7 : InstrItinClass;
+def tc_7a91e76a : InstrItinClass;
+def tc_838b34ea : InstrItinClass;
+def tc_85c9c08f : InstrItinClass;
+def tc_85d5d03f : InstrItinClass;
+def tc_862b3e70 : InstrItinClass;
+def tc_88b4f13d : InstrItinClass;
+def tc_89e94ad3 : InstrItinClass;
+def tc_8b121f4a : InstrItinClass;
+def tc_8b3e402a : InstrItinClass;
+def tc_8c945be0 : InstrItinClass;
+def tc_8c99de45 : InstrItinClass;
+def tc_8d9d0154 : InstrItinClass;
+def tc_8fb7ab1b : InstrItinClass;
+def tc_9461ff31 : InstrItinClass;
+def tc_946df596 : InstrItinClass;
+def tc_9ad9998f : InstrItinClass;
+def tc_9bfd761f : InstrItinClass;
+def tc_9c3ecd83 : InstrItinClass;
+def tc_9ca930f7 : InstrItinClass;
+def tc_9da59d12 : InstrItinClass;
+def tc_9debc299 : InstrItinClass;
+def tc_9e313203 : InstrItinClass;
+def tc_9fc3dae0 : InstrItinClass;
+def tc_a1123dda : InstrItinClass;
+def tc_a1c00888 : InstrItinClass;
+def tc_a58fd5cc : InstrItinClass;
+def tc_a5d4aeec : InstrItinClass;
+def tc_a6b1eca9 : InstrItinClass;
+def tc_a813cf9a : InstrItinClass;
+def tc_a9d88b22 : InstrItinClass;
+def tc_ae53734a : InstrItinClass;
+def tc_b31c2e97 : InstrItinClass;
+def tc_b343892a : InstrItinClass;
+def tc_b43e7930 : InstrItinClass;
+def tc_b4407292 : InstrItinClass;
+def tc_b44ecf75 : InstrItinClass;
+def tc_b4b5c03a : InstrItinClass;
+def tc_b51dc29a : InstrItinClass;
+def tc_b83e6d73 : InstrItinClass;
+def tc_b857bf4e : InstrItinClass;
+def tc_b8bffe55 : InstrItinClass;
+def tc_b90a29b1 : InstrItinClass;
+def tc_b9272d6c : InstrItinClass;
+def tc_b9e09e03 : InstrItinClass;
+def tc_bab0eed9 : InstrItinClass;
+def tc_bafaade3 : InstrItinClass;
+def tc_bcf98408 : InstrItinClass;
+def tc_bd8382d1 : InstrItinClass;
+def tc_bdceeac1 : InstrItinClass;
+def tc_be9602ff : InstrItinClass;
+def tc_bf061958 : InstrItinClass;
+def tc_bfec0f01 : InstrItinClass;
+def tc_c4db48cb : InstrItinClass;
+def tc_c4f596e3 : InstrItinClass;
+def tc_c79a189f : InstrItinClass;
+def tc_c8ce0b5c : InstrItinClass;
+def tc_cd374165 : InstrItinClass;
+def tc_cf8126ae : InstrItinClass;
+def tc_cfd8378a : InstrItinClass;
+def tc_d08ee0f4 : InstrItinClass;
+def tc_d1aa9eaa : InstrItinClass;
+def tc_d2e63d61 : InstrItinClass;
+def tc_d5b7b0c1 : InstrItinClass;
+def tc_d5c0729a : InstrItinClass;
+def tc_d63f638c : InstrItinClass;
+def tc_d65dbf51 : InstrItinClass;
+def tc_d773585a : InstrItinClass;
+def tc_d9d43ecb : InstrItinClass;
+def tc_da4a37ed : InstrItinClass;
+def tc_da97ee82 : InstrItinClass;
+def tc_db2bce9c : InstrItinClass;
+def tc_de4df740 : InstrItinClass;
+def tc_de554571 : InstrItinClass;
+def tc_df3319ed : InstrItinClass;
+def tc_e06f432a : InstrItinClass;
+def tc_e4a7f9f0 : InstrItinClass;
+def tc_e4b3cb20 : InstrItinClass;
+def tc_e78647bd : InstrItinClass;
+def tc_e86aa961 : InstrItinClass;
+def tc_e93a3d71 : InstrItinClass;
+def tc_e95795ec : InstrItinClass;
+def tc_e9f3243f : InstrItinClass;
+def tc_f429765c : InstrItinClass;
+def tc_f675fee8 : InstrItinClass;
+def tc_f8e23f0b : InstrItinClass;
+def tc_f9058dd7 : InstrItinClass;
+def tc_fc3999b4 : InstrItinClass;
+def tc_fcc3ddf9 : InstrItinClass;
+def tc_fe211424 : InstrItinClass;
class DepScalarItinV5 {
list<InstrItinData> DepScalarItinV5_list = [
- InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]> ];
+ InstrItinData <tc_002cb246, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0371abea, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_05c070ec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_05d3a09b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0663f615, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_096199d3, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_0a705168, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_0ae0825c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0b2be201, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_0d8f5752, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_13bfbcf9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_14b272fa, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_14b5c689, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_15aa71c5, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_174516e8, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_17e0d2cd, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_1a2fd869, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1ad90acd, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_1ae57e39, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1b6f7cec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1c4528a2, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_1c80410a, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1d81e60e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1fc97744, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_20cdee80, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2332b92e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_24b66c99, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_25a78932, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_2b8da4c2, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_2eabeebe, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2f7c551d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2ff964b4, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_30b9bb4a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_32779c6f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_36153880, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_362c6592, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3962fa26, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_39dfefe8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3a867367, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_3b470976, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_3b5b7ef9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3bd75825, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_3c76b0ff, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3d495a39, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_40116ca8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_434c8e1e, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_4414d8b1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_44d3da28, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_4560740b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4837eefb, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_49a8207d, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_4ae7b58b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4b68bce4, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_4c5ba658, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4d5fa3a1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_53559e35, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_56336eb0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_56f114f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_57890846, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5a2711e5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5abb5e3f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5aee39f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5b54b33f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5b7c0967, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5bf126a6, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5d7f5414, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_5ef37dc4, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_6132ba3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_61830035, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_640086b5, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_643b4717, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_67435e81, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_675e4897, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_679309b8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_6b25e783, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_703e822c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7186d325, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_7646c131, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_76851da1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_779080bf, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_784490da, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_785f65a7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_7a91e76a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_838b34ea, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_85c9c08f, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_85d5d03f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_862b3e70, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_88b4f13d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_89e94ad3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8b121f4a, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_8b3e402a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8c945be0, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_8c99de45, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_8d9d0154, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_8fb7ab1b, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9461ff31, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_946df596, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9ad9998f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_9bfd761f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9c3ecd83, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9ca930f7, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9da59d12, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9debc299, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9e313203, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9fc3dae0, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a1123dda, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a1c00888, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a58fd5cc, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a5d4aeec, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a6b1eca9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a813cf9a, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_a9d88b22, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_ae53734a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b31c2e97, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b343892a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b43e7930, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b4407292, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b44ecf75, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b4b5c03a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b51dc29a, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_b83e6d73, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_b857bf4e, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b8bffe55, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_b90a29b1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b9272d6c, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_b9e09e03, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_bab0eed9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bafaade3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_bcf98408, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_bd8382d1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_bdceeac1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_be9602ff, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_bf061958, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bfec0f01, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_c4db48cb, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_c4f596e3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_c79a189f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_c8ce0b5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cd374165, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cf8126ae, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_cfd8378a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d08ee0f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d1aa9eaa, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d2e63d61, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d5b7b0c1, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_d5c0729a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d63f638c, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d65dbf51, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d773585a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d9d43ecb, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_da4a37ed, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_da97ee82, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_db2bce9c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_de4df740, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_de554571, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_df3319ed, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_e06f432a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_e4a7f9f0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_e4b3cb20, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e78647bd, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_e86aa961, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e93a3d71, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_e95795ec, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_e9f3243f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_f429765c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f675fee8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f8e23f0b, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_f9058dd7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_fc3999b4, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_fcc3ddf9, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_fe211424, [InstrStage<1, [SLOT0]>]> ];
}
class DepScalarItinV55 {
list<InstrItinData> DepScalarItinV55_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1ad90acd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_1b9c9ee5, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_1d81e60e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_32779c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_36153880, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
-
- InstrItinData <tc_395dc00f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1],
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
- InstrItinData <tc_3d04548d, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2],
+ InstrItinData <tc_3a867367, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_3b470976, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
+ InstrItinData <tc_4414d8b1, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4560740b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
+ InstrItinData <tc_4837eefb, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4d5fa3a1, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b54b33f, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_675e4897, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_679309b8, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_76851da1, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_88b4f13d, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_897d1a9d, /*tc_1*/
+ InstrItinData <tc_946df596, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9bfd761f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9da59d12, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c00ce8d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_2early*/
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b343892a, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b8bffe55, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c5e2426d, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_bd8382d1, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_caaebcba, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_bfec0f01, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ InstrItinData <tc_d08ee0f4, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d1aa9eaa, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_3stall*/
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e06f432a, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e4a7f9f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_f429765c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>
];
}
class DepScalarItinV60 {
list<InstrItinData> DepScalarItinV60_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1ad90acd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_1d81e60e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_36153880, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [],
[]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
+ InstrItinData <tc_40116ca8, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_679309b8, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_76851da1, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_897d1a9d, /*tc_1*/
+ InstrItinData <tc_946df596, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_2early*/
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c2f7d806, /*tc_2*/
+ InstrItinData <tc_cf8126ae, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d1aa9eaa, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV62 {
+ list<InstrItinData> DepScalarItinV62_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
+ InstrItinData <tc_05c070ec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_1ae57e39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20cdee80, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_2332b92e, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
+ InstrItinData <tc_36153880, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
-class DepScalarItinV62 {
- list<InstrItinData> DepScalarItinV62_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
+ InstrItinData <tc_3d495a39, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_434c8e1e, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4560740b, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_53559e35, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56336eb0, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56f114f4, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_5b7c0967, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_5d7f5414, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_640086b5, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_643b4717, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
+ InstrItinData <tc_779080bf, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_85c9c08f, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_2early*/
+ InstrItinData <tc_85d5d03f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_862b3e70, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_367f7f3d, /*tc_st*/
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b121f4a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8c99de45, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9461ff31, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae53734a, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_481e5e5c, /*tc_2early*/
+ InstrItinData <tc_b31c2e97, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b51dc29a, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_b9272d6c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_b9e09e03, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cd374165, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cfd8378a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d65dbf51, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d9d43ecb, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_db2bce9c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
+ InstrItinData <tc_de4df740, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_e78647bd, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_897d1a9d, /*tc_2*/
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV65 {
+ list<InstrItinData> DepScalarItinV65_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05c070ec, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1b6f7cec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_1c4528a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c80410a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_2early*/
+ InstrItinData <tc_20cdee80, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_2332b92e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2eabeebe, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_32779c6f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_36153880, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_3d495a39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_434c8e1e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_4414d8b1, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3x*/
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56336eb0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_56f114f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_5b7c0967, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d7f5414, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6132ba3d, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_640086b5, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
+ InstrItinData <tc_643b4717, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_85c9c08f, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_85d5d03f, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_8b121f4a, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
+ InstrItinData <tc_9461ff31, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
-class DepScalarItinV65 {
- list<InstrItinData> DepScalarItinV65_list = [
- InstrItinData <tc_00afc57e, /*tc_2*/
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e313203, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a1c00888, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_038a1342, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_04c9decc, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_ae53734a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_10b97e27, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b31c2e97, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15411484, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_181af5d0, /*tc_1*/
+ InstrItinData <tc_b51dc29a, /*tc_1*/
[InstrStage<1, [SLOT2]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b9272d6c, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b9e09e03, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1e856f58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29175780, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b2f4060, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f185f5c, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_cd374165, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_351fed2d, /*tc_1*/
+ InstrItinData <tc_cfd8378a, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3669266a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_367f7f3d, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_d08ee0f4, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_d5b7b0c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_3cb8ea06, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3da80ba5, /*tc_ld*/
+ InstrItinData <tc_d63f638c, /*tc_ld*/
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_d773585a, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d9d43ecb, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_481e5e5c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_db2bce9c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de4df740, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
+ InstrItinData <tc_de554571, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e78647bd, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_523fcf30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_52d7bbea, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bc8a6a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV66 {
+ list<InstrItinData> DepScalarItinV66_list = [
+ InstrItinData <tc_002cb246, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_540fdfbc, /*tc_1*/
+ InstrItinData <tc_0371abea, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05c070ec, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_05d3a09b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0663f615, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_096199d3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57288781, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_0a705168, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ae0825c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_0b2be201, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_59a01ead, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0d8f5752, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_13bfbcf9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b272fa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_14b5c689, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5f6847a1, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_15aa71c5, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_174516e8, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_17e0d2cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_63fe3df7, /*tc_latepredldaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1a2fd869, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_1ad90acd, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_1b6f7cec, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_1c4528a2, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c80410a, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_1d81e60e, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_68cb12ce, /*tc_1*/
+ InstrItinData <tc_1fc97744, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20cdee80, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_2332b92e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24b66c99, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6ebb4a12, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_25a78932, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6efc556e, /*tc_1*/
+ InstrItinData <tc_2eabeebe, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_6fa4db47, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2f7c551d, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_73043bf4, /*tc_1*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2ff964b4, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_30b9bb4a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_32779c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_74e47fd9, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ InstrItinData <tc_36153880, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_362c6592, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3962fa26, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_39dfefe8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_3a867367, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a830544, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_3b470976, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_84df2cd3, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_3bd75825, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_3c76b0ff, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_855b0b61, /*tc_1*/
+ InstrItinData <tc_3d495a39, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ InstrItinData <tc_40116ca8, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_897d1a9d, /*tc_2*/
+ InstrItinData <tc_434c8e1e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4414d8b1, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_44d3da28, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fd5f294, /*tc_3x*/
+ InstrItinData <tc_4560740b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4837eefb, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fe6b782, /*tc_1*/
+ InstrItinData <tc_49a8207d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4ae7b58b, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_4b68bce4, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c5ba658, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56336eb0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56f114f4, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_90f3e30c, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_57890846, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5a2711e5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_97743097, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5aee39f7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b54b33f, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b7c0967, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_994333cd, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_5bf126a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_999d32db, /*tc_3stall*/
+ InstrItinData <tc_5d7f5414, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_99be14ca, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_5ef37dc4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6132ba3d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_61830035, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_640086b5, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_643b4717, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_67435e81, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_675e4897, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_679309b8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9faf76ae, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_6b25e783, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ InstrItinData <tc_703e822c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7186d325, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7646c131, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76851da1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_779080bf, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_784490da, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_785f65a7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7a91e76a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838b34ea, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a21dc435, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_85c9c08f, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a27582fa, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_85d5d03f, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_862b3e70, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a46f0df5, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_88b4f13d, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_89e94ad3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8b121f4a, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b13761ae, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_8c99de45, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_8fb7ab1b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b44c6e2a, /*tc_2*/
+ InstrItinData <tc_9461ff31, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_946df596, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9c3ecd83, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_9ca930f7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ InstrItinData <tc_9da59d12, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9debc299, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9e313203, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9fc3dae0, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be706f30, /*tc_1*/
+ InstrItinData <tc_a1123dda, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_a1c00888, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_a58fd5cc, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a6b1eca9, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a813cf9a, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
+ InstrItinData <tc_a9d88b22, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6aa82f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ae53734a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_b31c2e97, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c6ebf8dd, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_b343892a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_b43e7930, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4407292, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b44ecf75, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b4b5c03a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
+ InstrItinData <tc_b51dc29a, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b83e6d73, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b857bf4e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_caaebcba, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b8bffe55, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_b90a29b1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_b9272d6c, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf47a43f, /*tc_ld*/
+ InstrItinData <tc_b9e09e03, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bab0eed9, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bafaade3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ InstrItinData <tc_bcf98408, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d24b2d85, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_bdceeac1, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_be9602ff, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ InstrItinData <tc_bf061958, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_bfec0f01, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
+ InstrItinData <tc_c4db48cb, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_dbdffe3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_c4f596e3, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c79a189f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e0739b8c, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_cd374165, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e99bfa, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_cf8126ae, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_cfd8378a, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d08ee0f4, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_d2e63d61, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d5b7b0c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d5c0729a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e913dc32, /*tc_3x*/
+ InstrItinData <tc_d63f638c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d773585a, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9c822f7, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4],
- [Hex_FWD]>,
-
- InstrItinData <tc_e9fae2d6, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_d9d43ecb, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_da4a37ed, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ InstrItinData <tc_da97ee82, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f2704b9a, /*tc_1*/
+ InstrItinData <tc_db2bce9c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de4df740, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_de554571, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_df3319ed, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e06f432a, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
- InstrItinData <tc_f49e76f4, /*tc_2*/
+ InstrItinData <tc_e4a7f9f0, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e4b3cb20, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_e78647bd, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e86aa961, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e93a3d71, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e95795ec, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f429765c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_f675fee8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_f8e23f0b, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_f9058dd7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fc3999b4, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_fcc3ddf9, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fe211424, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>
];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
index 7e06ccede6e7..81e3971e21d2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.h -------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
namespace llvm {
namespace HexagonII {
enum Type {
@@ -45,6 +44,7 @@ enum Type {
TypeCVI_VX = 29,
TypeCVI_VX_DV = 30,
TypeCVI_VX_LATE = 31,
+ TypeCVI_ZW = 32,
TypeDUPLEX = 33,
TypeENDLOOP = 34,
TypeEXTENDER = 35,
@@ -59,7 +59,7 @@ enum Type {
TypeS_2op = 44,
TypeS_3op = 45,
TypeV2LDST = 48,
- TypeV4LDST = 49
+ TypeV4LDST = 49,
};
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
index 0a385bf938fe..f694062a5232 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,8 +9,7 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
-class IType<bits<6> t> { bits<6> Value = t; }
+class IType<bits<7> t> { bits<7> Value = t; }
def TypeALU32_2op : IType<0>;
def TypeALU32_3op : IType<1>;
def TypeALU32_ADDI : IType<2>;
@@ -43,6 +42,7 @@ def TypeCVI_VS_VX : IType<28>;
def TypeCVI_VX : IType<29>;
def TypeCVI_VX_DV : IType<30>;
def TypeCVI_VX_LATE : IType<31>;
+def TypeCVI_ZW : IType<32>;
def TypeDUPLEX : IType<33>;
def TypeENDLOOP : IType<34>;
def TypeEXTENDER : IType<35>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 9f98da3a1dee..ffe212ef9d97 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrFormats.td ------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
class Enc_890909 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
@@ -61,14 +60,6 @@ class Enc_27b757 : OpcodeHexagon {
bits <5> Vs32;
let Inst{4-0} = Vs32{4-0};
}
-class Enc_8d04c3 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_1de724 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -87,12 +78,6 @@ class Enc_0e41fa : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_2a736a : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_3d6d37 : OpcodeHexagon {
bits <2> Qs4;
let Inst{6-5} = Qs4{1-0};
@@ -121,14 +106,6 @@ class Enc_802dc0 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
}
-class Enc_6a4549 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_6b197f : OpcodeHexagon {
bits <4> Ii;
let Inst{8-5} = Ii{3-0};
@@ -137,22 +114,6 @@ class Enc_6b197f : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_1f3376 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
-}
-class Enc_1f5d8f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_51436c : OpcodeHexagon {
bits <16> Ii;
let Inst{23-22} = Ii{15-14};
@@ -249,6 +210,14 @@ class Enc_d7dc10 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
+class Enc_6baed4 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_736575 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -291,14 +260,6 @@ class Enc_509701 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_c84567 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_830e5d : OpcodeHexagon {
bits <8> Ii;
let Inst{12-5} = Ii{7-0};
@@ -310,12 +271,6 @@ class Enc_830e5d : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_ae0040 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <6> Sd64;
- let Inst{5-0} = Sd64{5-0};
-}
class Enc_79b8c8 : OpcodeHexagon {
bits <6> Ii;
let Inst{6-3} = Ii{5-2};
@@ -336,16 +291,6 @@ class Enc_58a8bf : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_e8ddd5 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_041d7b : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -369,14 +314,6 @@ class Enc_f44229 : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_fc563d : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_aad80c : OpcodeHexagon {
bits <5> Vuu32;
let Inst{12-8} = Vuu32{4-0};
@@ -434,6 +371,14 @@ class Enc_ee5ed0 : OpcodeHexagon {
bits <2> n1;
let Inst{9-8} = n1{1-0};
}
+class Enc_bddee3 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
+ bits <3> Rx8;
+ let Inst{18-16} = Rx8{2-0};
+}
class Enc_935d9b : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -573,6 +518,14 @@ class Enc_27fd0e : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
+class Enc_d7bc34 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
+}
class Enc_93af4c : OpcodeHexagon {
bits <7> Ii;
let Inst{10-4} = Ii{6-0};
@@ -620,12 +573,6 @@ class Enc_14640c : OpcodeHexagon {
let Inst{24-22} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_2516bf : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_31db33 : OpcodeHexagon {
bits <2> Qt4;
let Inst{6-5} = Qt4{1-0};
@@ -656,24 +603,6 @@ class Enc_784502 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9a9d62 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_3a81ac : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6413b6 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -703,13 +632,13 @@ class Enc_84bff1 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_74aef2 : OpcodeHexagon {
+class Enc_f4413a : OpcodeHexagon {
bits <4> Ii;
let Inst{8-5} = Ii{3-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
@@ -753,16 +682,6 @@ class Enc_e39bb2 : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_7db2f8 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{13-9} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{8-4} = Vv32{4-0};
- bits <4> Vdd16;
- let Inst{3-0} = Vdd16{3-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_1b64fb : OpcodeHexagon {
bits <16> Ii;
let Inst{26-25} = Ii{15-14};
@@ -772,6 +691,16 @@ class Enc_1b64fb : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
+class Enc_c1d806 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qe4;
+ let Inst{6-5} = Qe4{1-0};
+}
class Enc_c6220b : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -841,10 +770,6 @@ class Enc_fcf7a7 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_2c3281 : OpcodeHexagon {
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_55355c : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -877,6 +802,16 @@ class Enc_6185fe : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
+class Enc_74aef2 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_cd4705 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -920,10 +855,6 @@ class Enc_fef969 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_b2ffce : OpcodeHexagon {
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_63eaeb : OpcodeHexagon {
bits <2> Ii;
let Inst{1-0} = Ii{1-0};
@@ -948,12 +879,6 @@ class Enc_372c9d : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9e9047 : OpcodeHexagon {
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
-}
class Enc_4dff07 : OpcodeHexagon {
bits <2> Qv4;
let Inst{12-11} = Qv4{1-0};
@@ -1000,16 +925,6 @@ class Enc_b388cf : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_880793 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{2-0} = Qt8{2-0};
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_ad1c74 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -1086,14 +1001,6 @@ class Enc_88d4d9 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_c0cdde : OpcodeHexagon {
- bits <9> Ii;
- let Inst{13-5} = Ii{8-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
class Enc_226535 : OpcodeHexagon {
bits <8> Ii;
let Inst{12-7} = Ii{7-2};
@@ -1102,14 +1009,6 @@ class Enc_226535 : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
-class Enc_96f0fd : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <3> Qdd8;
- let Inst{2-0} = Qdd8{2-0};
-}
class Enc_31aa6a : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -1120,12 +1019,6 @@ class Enc_31aa6a : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_932b58 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
-}
class Enc_397f23 : OpcodeHexagon {
bits <8> Ii;
let Inst{13-13} = Ii{7-7};
@@ -1192,14 +1085,6 @@ class Enc_01d3d0 : OpcodeHexagon {
bits <5> Vdd32;
let Inst{4-0} = Vdd32{4-0};
}
-class Enc_3126d7 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_b0e9d8 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1209,6 +1094,14 @@ class Enc_b0e9d8 : OpcodeHexagon {
bits <5> Rx32;
let Inst{4-0} = Rx32{4-0};
}
+class Enc_1bd127 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdddd32;
+ let Inst{4-0} = Vdddd32{4-0};
+}
class Enc_3694bd : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -1276,12 +1169,6 @@ class Enc_88c16c : OpcodeHexagon {
bits <5> Rxx32;
let Inst{4-0} = Rxx32{4-0};
}
-class Enc_e7408c : OpcodeHexagon {
- bits <6> Sss64;
- let Inst{21-16} = Sss64{5-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_770858 : OpcodeHexagon {
bits <2> Ps4;
let Inst{6-5} = Ps4{1-0};
@@ -1323,15 +1210,14 @@ class Enc_412ff0 : OpcodeHexagon {
bits <5> Rxx32;
let Inst{12-8} = Rxx32{4-0};
}
-class Enc_8e9fbd : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
+class Enc_ef601b : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
}
class Enc_c9a18e : OpcodeHexagon {
bits <11> Ii;
@@ -1356,19 +1242,6 @@ class Enc_e6abcf : OpcodeHexagon {
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
}
-class Enc_6339d5 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
- bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
-}
class Enc_d6990d : OpcodeHexagon {
bits <5> Vuu32;
let Inst{12-8} = Vuu32{4-0};
@@ -1377,16 +1250,6 @@ class Enc_d6990d : OpcodeHexagon {
bits <5> Vxx32;
let Inst{4-0} = Vxx32{4-0};
}
-class Enc_6c4697 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6c9440 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1445,15 +1308,13 @@ class Enc_9d1247 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_f4413a : OpcodeHexagon {
- bits <4> Ii;
- let Inst{8-5} = Ii{3-0};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_7b7ba8 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
class Enc_f7430e : OpcodeHexagon {
bits <4> Ii;
@@ -1531,12 +1392,6 @@ class Enc_a803e0 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_fde0e3 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_45364e : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -1557,12 +1412,6 @@ class Enc_b909d2 : OpcodeHexagon {
let Inst{13-13} = n1{1-1};
let Inst{8-8} = n1{0-0};
}
-class Enc_790d6e : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_e6c957 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -1570,15 +1419,6 @@ class Enc_e6c957 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_fa3ba4 : OpcodeHexagon {
- bits <14> Ii;
- let Inst{26-25} = Ii{13-12};
- let Inst{13-5} = Ii{11-3};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_0d8870 : OpcodeHexagon {
bits <12> Ii;
let Inst{26-25} = Ii{11-10};
@@ -1623,14 +1463,6 @@ class Enc_0ed752 : OpcodeHexagon {
bits <5> Cdd32;
let Inst{4-0} = Cdd32{4-0};
}
-class Enc_908985 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_143445 : OpcodeHexagon {
bits <13> Ii;
let Inst{26-25} = Ii{12-11};
@@ -1658,16 +1490,6 @@ class Enc_3e3989 : OpcodeHexagon {
let Inst{25-22} = n1{4-1};
let Inst{8-8} = n1{0-0};
}
-class Enc_12dd8f : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
class Enc_152467 : OpcodeHexagon {
bits <5> Ii;
let Inst{8-5} = Ii{4-1};
@@ -1676,22 +1498,23 @@ class Enc_152467 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_6b1bc4 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <3> Qt8;
- let Inst{10-8} = Qt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_daea09 : OpcodeHexagon {
- bits <17> Ii;
- let Inst{23-22} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-1} = Ii{8-2};
+class Enc_9ac432 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
bits <2> Pu4;
- let Inst{9-8} = Pu4{1-0};
+ let Inst{7-6} = Pu4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_a90628 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
}
class Enc_f37377 : OpcodeHexagon {
bits <8> Ii;
@@ -1712,12 +1535,6 @@ class Enc_a198f6 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_a265b7 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_4e4a80 : OpcodeHexagon {
bits <2> Qs4;
let Inst{6-5} = Qs4{1-0};
@@ -1728,16 +1545,6 @@ class Enc_4e4a80 : OpcodeHexagon {
bits <5> Vvv32;
let Inst{4-0} = Vvv32{4-0};
}
-class Enc_8d5d98 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
-}
class Enc_3dac0b : OpcodeHexagon {
bits <2> Qt4;
let Inst{6-5} = Qt4{1-0};
@@ -1780,16 +1587,6 @@ class Enc_2df31d : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_b0e553 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_25bef0 : OpcodeHexagon {
bits <16> Ii;
let Inst{26-25} = Ii{15-14};
@@ -1905,10 +1702,14 @@ class Enc_bd1cbc : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_d0fe02 : OpcodeHexagon {
- bits <5> Rxx32;
- let Inst{20-16} = Rxx32{4-0};
- bits <0> sgp10;
+class Enc_c85e2a : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
class Enc_a30110 : OpcodeHexagon {
bits <5> Vu32;
@@ -1920,24 +1721,12 @@ class Enc_a30110 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_f3f408 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
-class Enc_ce4c54 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
+class Enc_33f8ba : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
class Enc_690862 : OpcodeHexagon {
bits <13> Ii;
@@ -1949,20 +1738,6 @@ class Enc_690862 : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_e570b0 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_3c46e8 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_2a3787 : OpcodeHexagon {
bits <13> Ii;
let Inst{26-25} = Ii{12-11};
@@ -2010,22 +1785,6 @@ class Enc_729ff7 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_5883d0 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_ff0e49 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <6> Sdd64;
- let Inst{5-0} = Sdd64{5-0};
-}
class Enc_217147 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
@@ -2060,14 +1819,6 @@ class Enc_541f26 : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
-class Enc_9aae4a : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <3> Qd8;
- let Inst{2-0} = Qd8{2-0};
-}
class Enc_724154 : OpcodeHexagon {
bits <6> II;
let Inst{5-0} = II{5-0};
@@ -2114,16 +1865,6 @@ class Enc_b84c4c : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_9ac432 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <2> Pu4;
- let Inst{7-6} = Pu4{1-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
class Enc_8203bb : OpcodeHexagon {
bits <6> Ii;
let Inst{12-7} = Ii{5-0};
@@ -2228,12 +1969,6 @@ class Enc_96ce4f : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_2bbae6 : OpcodeHexagon {
- bits <6> Ss64;
- let Inst{21-16} = Ss64{5-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
class Enc_143a3c : OpcodeHexagon {
bits <6> Ii;
let Inst{13-8} = Ii{5-0};
@@ -2281,13 +2016,14 @@ class Enc_de0214 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_a90628 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_daea09 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{23-22} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
}
class Enc_fda92c : OpcodeHexagon {
bits <17> Ii;
@@ -2365,26 +2101,6 @@ class Enc_b43b67 : OpcodeHexagon {
bits <2> Qx4;
let Inst{6-5} = Qx4{1-0};
}
-class Enc_1cd70f : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
-class Enc_3a527f : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_4aca3a : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -2403,12 +2119,6 @@ class Enc_b38ffc : OpcodeHexagon {
bits <4> Rt16;
let Inst{3-0} = Rt16{3-0};
}
-class Enc_5c3a80 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{10-8} = Qt8{2-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
class Enc_cda00a : OpcodeHexagon {
bits <12> Ii;
let Inst{19-16} = Ii{11-8};
@@ -2426,24 +2136,6 @@ class Enc_2fbf3c : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_a4ae28 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
-class Enc_dd5f9f : OpcodeHexagon {
- bits <3> Qtt8;
- let Inst{2-0} = Qtt8{2-0};
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_70b24b : OpcodeHexagon {
bits <6> Ii;
let Inst{8-5} = Ii{5-2};
@@ -2490,16 +2182,6 @@ class Enc_08d755 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_a7ca29 : OpcodeHexagon {
- bits <3> Qt8;
- let Inst{2-0} = Qt8{2-0};
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_1178da : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -2518,14 +2200,6 @@ class Enc_8dbe85 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_17a474 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_5a18b3 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -2586,14 +2260,6 @@ class Enc_12b6e9 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_9a895f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_6f70ca : OpcodeHexagon {
bits <8> Ii;
let Inst{8-4} = Ii{7-3};
@@ -2605,12 +2271,7 @@ class Enc_7222b7 : OpcodeHexagon {
let Inst{1-0} = Qd4{1-0};
}
class Enc_e3b0c4 : OpcodeHexagon {
-}
-class Enc_d7e8ba : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
+
}
class Enc_a255dc : OpcodeHexagon {
bits <3> Ii;
@@ -2628,16 +2289,6 @@ class Enc_cb785b : OpcodeHexagon {
bits <5> Vdd32;
let Inst{4-0} = Vdd32{4-0};
}
-class Enc_5b76ab : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-8} = Ii{8-3};
- let Inst{2-0} = Ii{2-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_cb4b4e : OpcodeHexagon {
bits <2> Pu4;
let Inst{6-5} = Pu4{1-0};
@@ -2648,23 +2299,13 @@ class Enc_cb4b4e : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_fbacc2 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
-class Enc_2ad23d : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
+class Enc_1f5d8f : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
class Enc_9cdba7 : OpcodeHexagon {
bits <8> Ii;
@@ -2683,10 +2324,6 @@ class Enc_5cd7e9 : OpcodeHexagon {
bits <5> Ryy32;
let Inst{4-0} = Ryy32{4-0};
}
-class Enc_e7c9de : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
-}
class Enc_454a26 : OpcodeHexagon {
bits <2> Pt4;
let Inst{9-8} = Pt4{1-0};
@@ -2786,14 +2423,6 @@ class Enc_d2c7f1 : OpcodeHexagon {
bits <2> Pe4;
let Inst{6-5} = Pe4{1-0};
}
-class Enc_dcfcbb : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_3680c2 : OpcodeHexagon {
bits <7> Ii;
let Inst{11-5} = Ii{6-0};
@@ -2822,31 +2451,13 @@ class Enc_e957fb : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
-class Enc_2146c1 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <3> Qss8;
- let Inst{2-0} = Qss8{2-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
-class Enc_a662ae : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_8f7cc3 : OpcodeHexagon {
- bits <3> Qtt8;
- let Inst{10-8} = Qtt8{2-0};
- bits <3> Qdd8;
- let Inst{5-3} = Qdd8{2-0};
+class Enc_c0cdde : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
class Enc_c9e3bc : OpcodeHexagon {
bits <4> Ii;
@@ -2886,33 +2497,18 @@ class Enc_6f83e7 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_46f33d : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_c1652e : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_6339d5 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <3> Qd8;
- let Inst{5-3} = Qd8{2-0};
-}
-class Enc_b5b643 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
-class Enc_85daf5 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
class Enc_d483b9 : OpcodeHexagon {
bits <1> Ii;
@@ -2952,13 +2548,14 @@ class Enc_6c9ee0 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_72a92d : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vxx32;
- let Inst{7-3} = Vxx32{4-0};
+class Enc_fa3ba4 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
class Enc_44661f : OpcodeHexagon {
bits <1> Mu2;
@@ -3006,14 +2603,6 @@ class Enc_da664b : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_7b7ba8 : OpcodeHexagon {
- bits <2> Qu4;
- let Inst{9-8} = Qu4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
class Enc_47ee5e : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -3116,14 +2705,6 @@ class Enc_8e583a : OpcodeHexagon {
let Inst{25-23} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_334c2b : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_b886fd : OpcodeHexagon {
bits <5> Ii;
let Inst{6-3} = Ii{4-1};
@@ -3177,36 +2758,12 @@ class Enc_8dbdfe : OpcodeHexagon {
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_7dc746 : OpcodeHexagon {
- bits <3> Quu8;
- let Inst{10-8} = Quu8{2-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <3> Qdd8;
- let Inst{5-3} = Qdd8{2-0};
-}
class Enc_90cd8b : OpcodeHexagon {
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_b8513b : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{20-16} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_b3bac4 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
-}
class Enc_bd0b33 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -3216,16 +2773,6 @@ class Enc_bd0b33 : OpcodeHexagon {
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_843e80 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{7-3} = Vd32{4-0};
- bits <3> Qxx8;
- let Inst{2-0} = Qxx8{2-0};
-}
class Enc_8b8927 : OpcodeHexagon {
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
@@ -3359,6 +2906,16 @@ class Enc_e07374 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
+class Enc_e0820b : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
class Enc_323f2d : OpcodeHexagon {
bits <6> II;
let Inst{11-8} = II{5-2};
@@ -3381,16 +2938,6 @@ class Enc_1a9974 : OpcodeHexagon {
bits <5> Rtt32;
let Inst{4-0} = Rtt32{4-0};
}
-class Enc_9ce456 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-8} = Ii{8-3};
- let Inst{2-0} = Ii{2-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_5de85f : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3416,14 +2963,6 @@ class Enc_0b51ce : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_b5e54d : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
class Enc_b4e6cf : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
@@ -3479,16 +3018,6 @@ class Enc_645d54 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_b5d5a7 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{7-3} = Vs32{4-0};
-}
class Enc_667b39 : OpcodeHexagon {
bits <5> Css32;
let Inst{20-16} = Css32{4-0};
@@ -3511,6 +3040,14 @@ class Enc_163a3c : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
+class Enc_a75aa6 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+}
class Enc_b087ac : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -3519,6 +3056,14 @@ class Enc_b087ac : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
+class Enc_691712 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
class Enc_b1e1fb : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3546,16 +3091,6 @@ class Enc_b8c967 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_f106e0 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{8-4} = Vv32{4-0};
- bits <5> Vt32;
- let Inst{13-9} = Vt32{4-0};
- bits <4> Vdd16;
- let Inst{3-0} = Vdd16{3-0};
-}
class Enc_fb6577 : OpcodeHexagon {
bits <2> Pu4;
let Inst{9-8} = Pu4{1-0};
@@ -3564,20 +3099,6 @@ class Enc_fb6577 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_37c406 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <4> Vdd16;
- let Inst{7-4} = Vdd16{3-0};
-}
-class Enc_403871 : OpcodeHexagon {
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
class Enc_2bae10 : OpcodeHexagon {
bits <4> Ii;
let Inst{10-8} = Ii{3-1};
@@ -3586,22 +3107,6 @@ class Enc_2bae10 : OpcodeHexagon {
bits <4> Rd16;
let Inst{3-0} = Rd16{3-0};
}
-class Enc_f3adb6 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_aac08c : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
-}
class Enc_c4dc92 : OpcodeHexagon {
bits <2> Qv4;
let Inst{23-22} = Qv4{1-0};
@@ -3743,12 +3248,14 @@ class Enc_134437 : OpcodeHexagon {
bits <2> Qd4;
let Inst{1-0} = Qd4{1-0};
}
-class Enc_33f8ba : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-8} = Ii{7-3};
- let Inst{4-2} = Ii{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_f3f408 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
class Enc_97d666 : OpcodeHexagon {
bits <4> Rs16;
@@ -3766,16 +3273,6 @@ class Enc_f82eaf : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_57e245 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
class Enc_69d63b : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
@@ -3842,24 +3339,6 @@ class Enc_7eaeb6 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_274a4c : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{20-16} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{2-0} = Rt8{2-0};
- bits <5> Vx32;
- let Inst{7-3} = Vx32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
-}
-class Enc_aceeef : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_f55a0c : OpcodeHexagon {
bits <6> Ii;
let Inst{11-8} = Ii{5-2};
@@ -3898,16 +3377,6 @@ class Enc_7b523d : OpcodeHexagon {
bits <5> Vxx32;
let Inst{4-0} = Vxx32{4-0};
}
-class Enc_c39a8b : OpcodeHexagon {
- bits <16> Ii;
- let Inst{21-21} = Ii{15-15};
- let Inst{13-8} = Ii{14-9};
- let Inst{2-0} = Ii{8-6};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vss32;
- let Inst{7-3} = Vss32{4-0};
-}
class Enc_47ef61 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
@@ -4006,6 +3475,14 @@ class Enc_a6ce9c : OpcodeHexagon {
bits <4> Rs16;
let Inst{7-4} = Rs16{3-0};
}
+class Enc_3b7631 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vdddd32;
+ let Inst{4-0} = Vdddd32{4-0};
+ bits <3> Rx8;
+ let Inst{18-16} = Rx8{2-0};
+}
class Enc_eca7c8 : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
@@ -4017,16 +3494,6 @@ class Enc_eca7c8 : OpcodeHexagon {
bits <5> Rt32;
let Inst{4-0} = Rt32{4-0};
}
-class Enc_598f6c : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
-}
-class Enc_41dcc3 : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{7-3} = Vdd32{4-0};
-}
class Enc_4b39e4 : OpcodeHexagon {
bits <3> Ii;
let Inst{7-5} = Ii{2-0};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index b6824fa33106..3ef1c49eb7ee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrInfo.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,12 +9,11 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def A2_abs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32)",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -25,7 +24,7 @@ def A2_absp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = abs($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -34,7 +33,7 @@ def A2_abssat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -46,7 +45,7 @@ def A2_add : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011000;
@@ -62,7 +61,7 @@ def A2_addh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -74,7 +73,7 @@ def A2_addh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -86,7 +85,7 @@ def A2_addh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -98,7 +97,7 @@ def A2_addh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -110,7 +109,7 @@ def A2_addh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -123,7 +122,7 @@ def A2_addh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -136,7 +135,7 @@ def A2_addh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -149,7 +148,7 @@ def A2_addh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -162,7 +161,7 @@ def A2_addh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -174,7 +173,7 @@ def A2_addh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -186,7 +185,7 @@ def A2_addh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -199,7 +198,7 @@ def A2_addh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -212,7 +211,7 @@ def A2_addi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
let Inst{31-28} = 0b1011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -231,7 +230,7 @@ def A2_addp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -242,7 +241,7 @@ def A2_addpsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -254,7 +253,7 @@ def A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110010;
@@ -269,14 +268,14 @@ def A2_addsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rs32,$Rtt32)",
-tc_897d1a9d, TypeALU64> {
+tc_679309b8, TypeALU64> {
let isPseudo = 1;
}
def A2_addsph : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:hi",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -286,7 +285,7 @@ def A2_addspl : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:lo",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -296,7 +295,7 @@ def A2_and : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = and($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001000;
@@ -312,7 +311,7 @@ def A2_andir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = and($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -328,7 +327,7 @@ def A2_andp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = and($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -338,7 +337,7 @@ def A2_aslh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = aslh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000000;
let hasNewValue = 1;
@@ -350,7 +349,7 @@ def A2_asrh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = asrh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000001;
let hasNewValue = 1;
@@ -362,7 +361,7 @@ def A2_combine_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011100;
@@ -374,7 +373,7 @@ def A2_combine_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011101;
@@ -386,7 +385,7 @@ def A2_combine_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011110;
@@ -398,7 +397,7 @@ def A2_combine_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011111;
@@ -410,7 +409,7 @@ def A2_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, s8_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_18c338 {
+tc_5a2711e5, TypeALU32_2op>, Enc_18c338 {
let Inst{31-23} = 0b011111000;
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
@@ -425,7 +424,7 @@ def A2_combinew : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = combine($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101000;
@@ -437,7 +436,7 @@ def A2_max : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = max($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -449,7 +448,7 @@ def A2_maxp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = max($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -459,7 +458,7 @@ def A2_maxu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = maxu($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -471,7 +470,7 @@ def A2_maxup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = maxu($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -481,7 +480,7 @@ def A2_min : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = min($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -493,7 +492,7 @@ def A2_minp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = min($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -503,7 +502,7 @@ def A2_minu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = minu($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -515,7 +514,7 @@ def A2_minup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = minu($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -525,7 +524,7 @@ def A2_neg : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -535,7 +534,7 @@ def A2_negp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = neg($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000100;
}
@@ -543,7 +542,7 @@ def A2_negsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -555,7 +554,7 @@ def A2_nop : HInst<
(outs),
(ins),
"nop",
-tc_6efc556e, TypeALU32_2op>, Enc_e3b0c4 {
+tc_2eabeebe, TypeALU32_2op>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0111111100000000;
}
@@ -563,7 +562,7 @@ def A2_not : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = not($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -573,7 +572,7 @@ def A2_notp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = not($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000100;
}
@@ -581,7 +580,7 @@ def A2_or : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = or($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001001;
@@ -597,7 +596,7 @@ def A2_orir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = or($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -613,7 +612,7 @@ def A2_orp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = or($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -623,7 +622,7 @@ def A2_paddf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
@@ -639,7 +638,7 @@ def A2_paddfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -656,7 +655,7 @@ def A2_paddif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
@@ -676,7 +675,7 @@ def A2_paddifnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
@@ -697,7 +696,7 @@ def A2_paddit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
@@ -716,7 +715,7 @@ def A2_padditnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
@@ -736,7 +735,7 @@ def A2_paddt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
@@ -751,7 +750,7 @@ def A2_paddtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -767,7 +766,7 @@ def A2_pandf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -781,7 +780,7 @@ def A2_pandfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -796,7 +795,7 @@ def A2_pandt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -809,7 +808,7 @@ def A2_pandtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -823,7 +822,7 @@ def A2_porf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -837,7 +836,7 @@ def A2_porfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -852,7 +851,7 @@ def A2_port : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -865,7 +864,7 @@ def A2_portnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -879,7 +878,7 @@ def A2_psubf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -893,7 +892,7 @@ def A2_psubfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -908,7 +907,7 @@ def A2_psubt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -921,7 +920,7 @@ def A2_psubtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -935,7 +934,7 @@ def A2_pxorf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -949,7 +948,7 @@ def A2_pxorfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -964,7 +963,7 @@ def A2_pxort : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -977,7 +976,7 @@ def A2_pxortnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -991,7 +990,7 @@ def A2_roundsat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = round($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1003,7 +1002,7 @@ def A2_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = sat($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1014,7 +1013,7 @@ def A2_satb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1025,7 +1024,7 @@ def A2_sath : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1036,7 +1035,7 @@ def A2_satub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1047,7 +1046,7 @@ def A2_satuh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satuh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1058,7 +1057,7 @@ def A2_sub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011001;
@@ -1073,7 +1072,7 @@ def A2_subh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1085,7 +1084,7 @@ def A2_subh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1097,7 +1096,7 @@ def A2_subh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1109,7 +1108,7 @@ def A2_subh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1121,7 +1120,7 @@ def A2_subh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1134,7 +1133,7 @@ def A2_subh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1147,7 +1146,7 @@ def A2_subh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1160,7 +1159,7 @@ def A2_subh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1173,7 +1172,7 @@ def A2_subh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1185,7 +1184,7 @@ def A2_subh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1197,7 +1196,7 @@ def A2_subh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1210,7 +1209,7 @@ def A2_subh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1223,7 +1222,7 @@ def A2_subp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = sub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -1232,7 +1231,7 @@ def A2_subri : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = sub(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
let Inst{31-22} = 0b0111011001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1248,7 +1247,7 @@ def A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110110;
@@ -1262,7 +1261,7 @@ def A2_svaddh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110000;
@@ -1275,7 +1274,7 @@ def A2_svaddhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110001;
@@ -1290,7 +1289,7 @@ def A2_svadduhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vadduh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110011;
@@ -1305,12 +1304,13 @@ def A2_svavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_1c80410a, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111000;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
let isCommutable = 1;
}
@@ -1318,12 +1318,13 @@ def A2_svavghs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32):rnd",
-tc_8fe6b782, TypeALU32_3op>, Enc_5ab2be {
+tc_d08ee0f4, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111001;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
let isCommutable = 1;
}
@@ -1331,19 +1332,20 @@ def A2_svnavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vnavgh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_1c80410a, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111011;
let hasNewValue = 1;
let opNewValue = 0;
+let prefersSlot3 = 1;
let InputType = "reg";
}
def A2_svsubh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110100;
@@ -1355,7 +1357,7 @@ def A2_svsubhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110101;
@@ -1369,7 +1371,7 @@ def A2_svsubuhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubuh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110111;
@@ -1383,7 +1385,7 @@ def A2_swiz : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = swiz($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -1393,7 +1395,7 @@ def A2_sxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxtb($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000101;
let hasNewValue = 1;
@@ -1405,7 +1407,7 @@ def A2_sxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000111;
let hasNewValue = 1;
@@ -1417,7 +1419,7 @@ def A2_sxtw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = sxtw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100010;
}
@@ -1425,7 +1427,7 @@ def A2_tfr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = $Rs32",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000011;
let hasNewValue = 1;
@@ -1438,7 +1440,7 @@ def A2_tfrcrr : HInst<
(outs IntRegs:$Rd32),
(ins CtrRegs:$Cs32),
"$Rd32 = $Cs32",
-tc_29175780, TypeCR>, Enc_0cb018 {
+tc_b9272d6c, TypeCR>, Enc_0cb018 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010000;
let hasNewValue = 1;
@@ -1448,7 +1450,7 @@ def A2_tfrf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -1463,7 +1465,7 @@ def A2_tfrfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
@@ -1479,7 +1481,7 @@ def A2_tfrih : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.h = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110010;
let hasNewValue = 1;
@@ -1490,7 +1492,7 @@ def A2_tfril : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.l = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110001;
let hasNewValue = 1;
@@ -1501,7 +1503,7 @@ def A2_tfrp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let BaseOpcode = "A2_tfrp";
let isPredicable = 1;
let isPseudo = 1;
@@ -1510,7 +1512,7 @@ def A2_tfrpf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let BaseOpcode = "A2_tfrp";
@@ -1520,7 +1522,7 @@ def A2_tfrpfnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let isPredicatedNew = 1;
@@ -1531,7 +1533,7 @@ def A2_tfrpi : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii),
"$Rdd32 = #$Ii",
-tc_b9488031, TypeALU64> {
+tc_5a2711e5, TypeALU64> {
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
@@ -1541,7 +1543,7 @@ def A2_tfrpt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let BaseOpcode = "A2_tfrp";
let isPseudo = 1;
@@ -1550,7 +1552,7 @@ def A2_tfrptnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedNew = 1;
let BaseOpcode = "A2_tfrp";
@@ -1560,7 +1562,7 @@ def A2_tfrrcr : HInst<
(outs CtrRegs:$Cd32),
(ins IntRegs:$Rs32),
"$Cd32 = $Rs32",
-tc_a21dc435, TypeCR>, Enc_bd811a {
+tc_434c8e1e, TypeCR>, Enc_bd811a {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010001;
let hasNewValue = 1;
@@ -1570,7 +1572,7 @@ def A2_tfrsi : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii),
"$Rd32 = #$Ii",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
+tc_57890846, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
let Inst{21-21} = 0b0;
let Inst{31-24} = 0b01111000;
let hasNewValue = 1;
@@ -1592,7 +1594,7 @@ def A2_tfrt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1606,7 +1608,7 @@ def A2_tfrtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1621,7 +1623,7 @@ def A2_vabsh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1630,7 +1632,7 @@ def A2_vabshsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1640,7 +1642,7 @@ def A2_vabsw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1649,7 +1651,7 @@ def A2_vabswsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1659,7 +1661,7 @@ def A2_vaddb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1667,7 +1669,7 @@ def A2_vaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1676,7 +1678,7 @@ def A2_vaddhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1687,7 +1689,7 @@ def A2_vaddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1696,7 +1698,7 @@ def A2_vaddubs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1707,7 +1709,7 @@ def A2_vadduhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vadduh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1718,7 +1720,7 @@ def A2_vaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1727,7 +1729,7 @@ def A2_vaddws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1738,16 +1740,17 @@ def A2_vavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1757,79 +1760,87 @@ def A2_vavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavgub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavgubr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
}
def A2_vavguw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavguwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1839,16 +1850,17 @@ def A2_vavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
}
def A2_vcmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1857,7 +1869,7 @@ def A2_vcmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1866,7 +1878,7 @@ def A2_vcmpheq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1875,7 +1887,7 @@ def A2_vcmphgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1884,7 +1896,7 @@ def A2_vcmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1893,7 +1905,7 @@ def A2_vcmpweq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1902,7 +1914,7 @@ def A2_vcmpwgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1911,7 +1923,7 @@ def A2_vcmpwgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1920,7 +1932,7 @@ def A2_vconj : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vconj($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -1930,7 +1942,7 @@ def A2_vmaxb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1940,7 +1952,7 @@ def A2_vmaxh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1950,7 +1962,7 @@ def A2_vmaxub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1960,7 +1972,7 @@ def A2_vmaxuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1970,7 +1982,7 @@ def A2_vmaxuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -1980,7 +1992,7 @@ def A2_vmaxw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1990,7 +2002,7 @@ def A2_vminb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -2000,7 +2012,7 @@ def A2_vminh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2010,7 +2022,7 @@ def A2_vminub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2020,7 +2032,7 @@ def A2_vminuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2030,7 +2042,7 @@ def A2_vminuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2040,7 +2052,7 @@ def A2_vminw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2050,16 +2062,17 @@ def A2_vnavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
}
def A2_vnavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2070,7 +2083,7 @@ def A2_vnavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2081,16 +2094,17 @@ def A2_vnavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
}
def A2_vnavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2101,7 +2115,7 @@ def A2_vnavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2112,7 +2126,7 @@ def A2_vraddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vraddub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2122,7 +2136,7 @@ def A2_vraddub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vraddub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2133,7 +2147,7 @@ def A2_vrsadub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrsadub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2143,7 +2157,7 @@ def A2_vrsadub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrsadub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2154,7 +2168,7 @@ def A2_vsubb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vsubb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -2162,7 +2176,7 @@ def A2_vsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2171,7 +2185,7 @@ def A2_vsubhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2182,7 +2196,7 @@ def A2_vsubub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2191,7 +2205,7 @@ def A2_vsububs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2202,7 +2216,7 @@ def A2_vsubuhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2213,7 +2227,7 @@ def A2_vsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2222,7 +2236,7 @@ def A2_vsubws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2233,7 +2247,7 @@ def A2_xor : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = xor($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001011;
@@ -2248,7 +2262,7 @@ def A2_xorp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = xor($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2258,7 +2272,7 @@ def A2_zxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
let hasNewValue = 1;
let opNewValue = 0;
let BaseOpcode = "A2_zxtb";
@@ -2270,7 +2284,7 @@ def A2_zxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000110;
let hasNewValue = 1;
@@ -2282,7 +2296,7 @@ def A4_addp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010110;
@@ -2293,7 +2307,7 @@ def A4_andn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = and($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001100;
@@ -2305,7 +2319,7 @@ def A4_andnp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = and($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2314,7 +2328,7 @@ def A4_bitsplit : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = bitsplit($Rs32,$Rt32)",
-tc_1b9c9ee5, TypeALU64>, Enc_be32a5 {
+tc_4414d8b1, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100001;
@@ -2324,7 +2338,7 @@ def A4_bitspliti : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rdd32 = bitsplit($Rs32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_311abd {
+tc_4414d8b1, TypeS_2op>, Enc_311abd {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -2334,14 +2348,14 @@ def A4_boundscheck : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rs32,$Rtt32)",
-tc_1e856f58, TypeALU64> {
+tc_85d5d03f, TypeALU64> {
let isPseudo = 1;
}
def A4_boundscheck_hi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2350,7 +2364,7 @@ def A4_boundscheck_lo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2359,7 +2373,7 @@ def A4_cmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2372,7 +2386,7 @@ def A4_cmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmpb.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2385,7 +2399,7 @@ def A4_cmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2397,7 +2411,7 @@ def A4_cmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmpb.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2409,7 +2423,7 @@ def A4_cmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2421,7 +2435,7 @@ def A4_cmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmpb.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2438,7 +2452,7 @@ def A4_cmpheq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2451,7 +2465,7 @@ def A4_cmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2469,7 +2483,7 @@ def A4_cmphgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2481,7 +2495,7 @@ def A4_cmphgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2498,7 +2512,7 @@ def A4_cmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2510,7 +2524,7 @@ def A4_cmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmph.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2527,7 +2541,7 @@ def A4_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_f0cca7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_f0cca7 {
let Inst{31-21} = 0b01111100100;
let isExtendable = 1;
let opExtendable = 2;
@@ -2539,7 +2553,7 @@ def A4_combineir : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rdd32 = combine(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011001;
let isExtendable = 1;
@@ -2552,7 +2566,7 @@ def A4_combineri : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rdd32 = combine($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011000;
let isExtendable = 1;
@@ -2565,7 +2579,7 @@ def A4_cround_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = cround($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -2577,7 +2591,7 @@ def A4_cround_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cround($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -2589,14 +2603,14 @@ def A4_ext : HInst<
(outs),
(ins u26_6Imm:$Ii),
"immext(#$Ii)",
-tc_452f85af, TypeEXTENDER>, Enc_2b518f {
+tc_862b3e70, TypeEXTENDER>, Enc_2b518f {
let Inst{31-28} = 0b0000;
}
def A4_modwrapu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = modwrap($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2608,7 +2622,7 @@ def A4_orn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = or($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001101;
@@ -2620,7 +2634,7 @@ def A4_ornp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = or($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2629,7 +2643,7 @@ def A4_paslhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000000;
@@ -2643,7 +2657,7 @@ def A4_paslhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000000;
@@ -2658,7 +2672,7 @@ def A4_paslht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000000;
@@ -2671,7 +2685,7 @@ def A4_paslhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000000;
@@ -2685,7 +2699,7 @@ def A4_pasrhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000001;
@@ -2699,7 +2713,7 @@ def A4_pasrhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000001;
@@ -2714,7 +2728,7 @@ def A4_pasrht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000001;
@@ -2727,7 +2741,7 @@ def A4_pasrhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000001;
@@ -2741,7 +2755,7 @@ def A4_psxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000101;
@@ -2755,7 +2769,7 @@ def A4_psxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000101;
@@ -2770,7 +2784,7 @@ def A4_psxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000101;
@@ -2783,7 +2797,7 @@ def A4_psxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000101;
@@ -2797,7 +2811,7 @@ def A4_psxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000111;
@@ -2811,7 +2825,7 @@ def A4_psxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000111;
@@ -2826,7 +2840,7 @@ def A4_psxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000111;
@@ -2839,7 +2853,7 @@ def A4_psxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000111;
@@ -2853,7 +2867,7 @@ def A4_pzxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000100;
@@ -2867,7 +2881,7 @@ def A4_pzxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000100;
@@ -2882,7 +2896,7 @@ def A4_pzxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000100;
@@ -2895,7 +2909,7 @@ def A4_pzxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000100;
@@ -2909,7 +2923,7 @@ def A4_pzxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000110;
@@ -2923,7 +2937,7 @@ def A4_pzxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000110;
@@ -2938,7 +2952,7 @@ def A4_pzxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000110;
@@ -2951,7 +2965,7 @@ def A4_pzxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000110;
@@ -2965,7 +2979,7 @@ def A4_rcmpeq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011010;
@@ -2979,7 +2993,7 @@ def A4_rcmpeqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011010;
let hasNewValue = 1;
@@ -2996,7 +3010,7 @@ def A4_rcmpneq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = !cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011011;
@@ -3010,7 +3024,7 @@ def A4_rcmpneqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = !cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011011;
let hasNewValue = 1;
@@ -3027,7 +3041,7 @@ def A4_round_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3039,7 +3053,7 @@ def A4_round_ri_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3052,7 +3066,7 @@ def A4_round_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3064,7 +3078,7 @@ def A4_round_rr_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3077,7 +3091,7 @@ def A4_subp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010111;
@@ -3088,7 +3102,7 @@ def A4_tfrcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins CtrRegs64:$Css32),
"$Rdd32 = $Css32",
-tc_29175780, TypeCR>, Enc_667b39 {
+tc_b9272d6c, TypeCR>, Enc_667b39 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000000;
}
@@ -3096,7 +3110,7 @@ def A4_tfrpcp : HInst<
(outs CtrRegs64:$Cdd32),
(ins DoubleRegs:$Rss32),
"$Cdd32 = $Rss32",
-tc_a21dc435, TypeCR>, Enc_0ed752 {
+tc_434c8e1e, TypeCR>, Enc_0ed752 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011001;
}
@@ -3104,7 +3118,7 @@ def A4_tlbmatch : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Pd4 = tlbmatch($Rss32,$Rt32)",
-tc_04c9decc, TypeALU64>, Enc_03833b {
+tc_4837eefb, TypeALU64>, Enc_03833b {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3114,7 +3128,7 @@ def A4_vcmpbeq_any : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3123,7 +3137,7 @@ def A4_vcmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
"$Pd4 = vcmpb.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3132,7 +3146,7 @@ def A4_vcmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3141,7 +3155,7 @@ def A4_vcmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpb.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3150,7 +3164,7 @@ def A4_vcmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3159,7 +3173,7 @@ def A4_vcmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3168,7 +3182,7 @@ def A4_vcmphgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3177,7 +3191,7 @@ def A4_vcmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmph.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3186,7 +3200,7 @@ def A4_vcmpweqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3195,7 +3209,7 @@ def A4_vcmpwgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3204,7 +3218,7 @@ def A4_vcmpwgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3213,7 +3227,7 @@ def A4_vrmaxh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3224,7 +3238,7 @@ def A4_vrmaxuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3235,7 +3249,7 @@ def A4_vrmaxuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3246,7 +3260,7 @@ def A4_vrmaxw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3257,7 +3271,7 @@ def A4_vrminh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3268,7 +3282,7 @@ def A4_vrminuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3279,7 +3293,7 @@ def A4_vrminuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3290,7 +3304,7 @@ def A4_vrminw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3301,7 +3315,7 @@ def A5_ACS : HInst<
(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
-tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55]> {
+tc_d1aa9eaa, TypeM>, Enc_831a7d, Requires<[HasV55]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -3314,7 +3328,7 @@ def A5_vaddhubs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
+tc_002cb246, TypeS_3op>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -3327,7 +3341,7 @@ def A6_vcmpbeq_notany : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
+tc_1fc97744, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3336,7 +3350,7 @@ def A6_vminub_RdP : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
-tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
+tc_f9058dd7, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -3347,7 +3361,7 @@ def C2_all8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = all8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011101000;
}
@@ -3355,7 +3369,7 @@ def C2_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000000;
@@ -3364,7 +3378,7 @@ def C2_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011000;
@@ -3373,7 +3387,7 @@ def C2_any8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = any8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011100000;
}
@@ -3381,7 +3395,7 @@ def C2_bitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111100;
@@ -3390,7 +3404,7 @@ def C2_bitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101100;
}
@@ -3398,7 +3412,7 @@ def C2_bitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111010;
@@ -3407,7 +3421,7 @@ def C2_ccombinewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3419,7 +3433,7 @@ def C2_ccombinewnewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3432,7 +3446,7 @@ def C2_ccombinewnewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3444,7 +3458,7 @@ def C2_ccombinewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3455,7 +3469,7 @@ def C2_cmoveif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3477,7 +3491,7 @@ def C2_cmoveit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
@@ -3498,7 +3512,7 @@ def C2_cmovenewif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3521,7 +3535,7 @@ def C2_cmovenewit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
@@ -3543,7 +3557,7 @@ def C2_cmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3556,7 +3570,7 @@ def C2_cmpeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C2_cmpeq";
@@ -3572,7 +3586,7 @@ def C2_cmpeqp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3583,7 +3597,7 @@ def C2_cmpgei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmp.ge($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3591,7 +3605,7 @@ def C2_cmpgeui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmp.geu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3599,7 +3613,7 @@ def C2_cmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3611,7 +3625,7 @@ def C2_cmpgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C2_cmpgt";
@@ -3627,7 +3641,7 @@ def C2_cmpgtp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3637,7 +3651,7 @@ def C2_cmpgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3649,7 +3663,7 @@ def C2_cmpgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C2_cmpgtu";
@@ -3665,7 +3679,7 @@ def C2_cmpgtup : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3675,7 +3689,7 @@ def C2_cmplt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.lt($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3684,7 +3698,7 @@ def C2_cmpltu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.ltu($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3693,7 +3707,7 @@ def C2_mask : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4),
"$Rdd32 = mask($Pt4)",
-tc_cde8b071, TypeS_2op>, Enc_78e566 {
+tc_0ae0825c, TypeS_2op>, Enc_78e566 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b1000011000000000;
@@ -3702,7 +3716,7 @@ def C2_mux : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mux($Pu4,$Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54 {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110100000;
@@ -3714,7 +3728,7 @@ def C2_muxii : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
"$Rd32 = mux($Pu4,#$Ii,#$II)",
-tc_d6bf0472, TypeALU32_2op>, Enc_830e5d {
+tc_4c5ba658, TypeALU32_2op>, Enc_830e5d {
let Inst{31-25} = 0b0111101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -3728,7 +3742,7 @@ def C2_muxir : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = mux($Pu4,$Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100110;
let hasNewValue = 1;
@@ -3744,7 +3758,7 @@ def C2_muxri : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = mux($Pu4,#$Ii,$Rs32)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100111;
let hasNewValue = 1;
@@ -3760,7 +3774,7 @@ def C2_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = not($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011110000;
}
@@ -3768,7 +3782,7 @@ def C2_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001000;
@@ -3777,7 +3791,7 @@ def C2_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111000;
@@ -3786,7 +3800,7 @@ def C2_pxfer_map : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = $Ps4",
-tc_53bc8a6a, TypeMAPPING> {
+tc_640086b5, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -3794,7 +3808,7 @@ def C2_tfrpr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4),
"$Rd32 = $Ps4",
-tc_cde8b071, TypeS_2op>, Enc_f5e933 {
+tc_0ae0825c, TypeS_2op>, Enc_f5e933 {
let Inst{13-5} = 0b000000000;
let Inst{31-18} = 0b10001001010000;
let hasNewValue = 1;
@@ -3804,7 +3818,7 @@ def C2_tfrrp : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32),
"$Pd4 = $Rs32",
-tc_351fed2d, TypeS_2op>, Enc_48b75f {
+tc_cfd8378a, TypeS_2op>, Enc_48b75f {
let Inst{13-2} = 0b000000000000;
let Inst{31-21} = 0b10000101010;
}
@@ -3812,7 +3826,7 @@ def C2_vitpack : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Rd32 = vitpack($Ps4,$Pt4)",
-tc_1b9c9ee5, TypeS_2op>, Enc_527412 {
+tc_4414d8b1, TypeS_2op>, Enc_527412 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b10001001000000;
@@ -3824,7 +3838,7 @@ def C2_vmux : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
-tc_f8eeed7a, TypeALU64>, Enc_329361 {
+tc_b4b5c03a, TypeALU64>, Enc_329361 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010001000;
@@ -3833,7 +3847,7 @@ def C2_xor : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = xor($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010000;
@@ -3842,7 +3856,7 @@ def C4_addipc : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = add(pc,#$Ii)",
-tc_b9c4623f, TypeCR>, Enc_607661 {
+tc_a813cf9a, TypeCR>, Enc_607661 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0110101001001001;
@@ -3858,7 +3872,7 @@ def C4_and_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000100;
@@ -3867,7 +3881,7 @@ def C4_and_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011100100;
@@ -3876,7 +3890,7 @@ def C4_and_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001100;
@@ -3885,7 +3899,7 @@ def C4_and_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011101100;
@@ -3894,7 +3908,7 @@ def C4_cmplte : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3906,7 +3920,7 @@ def C4_cmpltei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C4_cmplte";
@@ -3922,7 +3936,7 @@ def C4_cmplteu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3934,7 +3948,7 @@ def C4_cmplteui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = !cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C4_cmplteu";
@@ -3950,7 +3964,7 @@ def C4_cmpneq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3963,7 +3977,7 @@ def C4_cmpneqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C4_cmpneq";
@@ -3979,7 +3993,7 @@ def C4_fastcorner9 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000000;
@@ -3988,7 +4002,7 @@ def C4_fastcorner9_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = !fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000100;
@@ -3997,7 +4011,7 @@ def C4_nbitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111101;
@@ -4006,7 +4020,7 @@ def C4_nbitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = !bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101101;
}
@@ -4014,7 +4028,7 @@ def C4_nbitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111011;
@@ -4023,7 +4037,7 @@ def C4_or_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010100;
@@ -4032,7 +4046,7 @@ def C4_or_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011110100;
@@ -4041,7 +4055,7 @@ def C4_or_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011100;
@@ -4050,7 +4064,7 @@ def C4_or_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111100;
@@ -4059,7 +4073,7 @@ def F2_conv_d2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_d2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4069,7 +4083,7 @@ def F2_conv_d2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_d2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -4081,7 +4095,7 @@ def F2_conv_df2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4091,7 +4105,7 @@ def F2_conv_df2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4101,7 +4115,7 @@ def F2_conv_df2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -4113,7 +4127,7 @@ def F2_conv_df2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4123,7 +4137,7 @@ def F2_conv_df2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4133,7 +4147,7 @@ def F2_conv_df2uw : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -4145,7 +4159,7 @@ def F2_conv_df2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000101;
let hasNewValue = 1;
@@ -4157,7 +4171,7 @@ def F2_conv_df2w : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -4169,7 +4183,7 @@ def F2_conv_df2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -4181,7 +4195,7 @@ def F2_conv_sf2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4191,7 +4205,7 @@ def F2_conv_sf2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4201,7 +4215,7 @@ def F2_conv_sf2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4211,7 +4225,7 @@ def F2_conv_sf2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4221,7 +4235,7 @@ def F2_conv_sf2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4231,7 +4245,7 @@ def F2_conv_sf2uw : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4243,7 +4257,7 @@ def F2_conv_sf2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4255,7 +4269,7 @@ def F2_conv_sf2w : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4267,7 +4281,7 @@ def F2_conv_sf2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4279,7 +4293,7 @@ def F2_conv_ud2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_ud2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4289,7 +4303,7 @@ def F2_conv_ud2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_ud2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000001;
let hasNewValue = 1;
@@ -4301,7 +4315,7 @@ def F2_conv_uw2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_uw2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4311,7 +4325,7 @@ def F2_conv_uw2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_uw2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011001;
let hasNewValue = 1;
@@ -4323,7 +4337,7 @@ def F2_conv_w2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_w2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4333,7 +4347,7 @@ def F2_conv_w2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_w2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011010;
let hasNewValue = 1;
@@ -4341,11 +4355,22 @@ let opNewValue = 0;
let isFP = 1;
let Uses = [USR];
}
+def F2_dfadd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfadd($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let isFP = 1;
+let Uses = [USR];
+}
def F2_dfclass : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Pd4 = dfclass($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
+tc_643b4717, TypeALU64>, Enc_1f19b5 {
let Inst{4-2} = 0b100;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b11011100100;
@@ -4356,7 +4381,7 @@ def F2_dfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4368,7 +4393,7 @@ def F2_dfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4380,7 +4405,7 @@ def F2_dfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4392,7 +4417,7 @@ def F2_dfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4404,7 +4429,7 @@ def F2_dfimm_n : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100101;
let prefersSlot3 = 1;
@@ -4413,16 +4438,27 @@ def F2_dfimm_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100100;
let prefersSlot3 = 1;
}
+def F2_dfsub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfsub($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let isFP = 1;
+let Uses = [USR];
+}
def F2_sfadd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfadd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4436,7 +4472,7 @@ def F2_sfclass : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = sfclass($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101111;
@@ -4447,7 +4483,7 @@ def F2_sfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4459,7 +4495,7 @@ def F2_sfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4471,7 +4507,7 @@ def F2_sfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4483,7 +4519,7 @@ def F2_sfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4495,7 +4531,7 @@ def F2_sffixupd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4507,7 +4543,7 @@ def F2_sffixupn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4519,7 +4555,7 @@ def F2_sffixupr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sffixupr($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011101;
let hasNewValue = 1;
@@ -4530,7 +4566,7 @@ def F2_sffma : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4544,7 +4580,7 @@ def F2_sffma_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4558,7 +4594,7 @@ def F2_sffma_sc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
+tc_4560740b, TypeM>, Enc_437f33 {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -4572,7 +4608,7 @@ def F2_sffms : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4586,7 +4622,7 @@ def F2_sffms_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4600,7 +4636,7 @@ def F2_sfimm_n : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011001;
let hasNewValue = 1;
@@ -4611,7 +4647,7 @@ def F2_sfimm_p : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011000;
let hasNewValue = 1;
@@ -4622,7 +4658,7 @@ def F2_sfinvsqrta : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32),
"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
+tc_b8bffe55, TypeS_2op>, Enc_890909 {
let Inst{13-7} = 0b0000000;
let Inst{31-21} = 0b10001011111;
let hasNewValue = 1;
@@ -4634,7 +4670,7 @@ def F2_sfmax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmax($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4648,7 +4684,7 @@ def F2_sfmin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmin($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4662,7 +4698,7 @@ def F2_sfmpy : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011010;
@@ -4676,7 +4712,7 @@ def F2_sfrecipa : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
+tc_2ff964b4, TypeM>, Enc_a94f3b {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011111;
@@ -4689,7 +4725,7 @@ def F2_sfsub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfsub($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4702,7 +4738,7 @@ def G4_tfrgcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins GuestRegs64:$Gss32),
"$Rdd32 = $Gss32",
-tc_6fa4db47, TypeCR>, Enc_0aa344 {
+tc_0d8f5752, TypeCR>, Enc_0aa344 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000001;
}
@@ -4710,7 +4746,7 @@ def G4_tfrgcrr : HInst<
(outs IntRegs:$Rd32),
(ins GuestRegs:$Gs32),
"$Rd32 = $Gs32",
-tc_6fa4db47, TypeCR>, Enc_44271f {
+tc_0d8f5752, TypeCR>, Enc_44271f {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010001;
let hasNewValue = 1;
@@ -4720,7 +4756,7 @@ def G4_tfrgpcp : HInst<
(outs GuestRegs64:$Gdd32),
(ins DoubleRegs:$Rss32),
"$Gdd32 = $Rss32",
-tc_994333cd, TypeCR>, Enc_ed5027 {
+tc_bcf98408, TypeCR>, Enc_ed5027 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011000;
let hasNewValue = 1;
@@ -4730,7 +4766,7 @@ def G4_tfrgrcr : HInst<
(outs GuestRegs:$Gd32),
(ins IntRegs:$Rs32),
"$Gd32 = $Rs32",
-tc_994333cd, TypeCR>, Enc_621fba {
+tc_bcf98408, TypeCR>, Enc_621fba {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010000;
let hasNewValue = 1;
@@ -4740,7 +4776,7 @@ def J2_call : HInst<
(outs),
(ins a30_2Imm:$Ii),
"call $Ii",
-tc_a27582fa, TypeJ>, Enc_81ac1d, PredRel {
+tc_4ae7b58b, TypeJ>, Enc_81ac1d, PredRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101101;
let isCall = 1;
@@ -4762,7 +4798,7 @@ def J2_callf : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if (!$Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4789,7 +4825,7 @@ def J2_callr : HInst<
(outs),
(ins IntRegs:$Rs32),
"callr $Rs32",
-tc_15411484, TypeJ>, Enc_ecbcc8 {
+tc_3bd75825, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010000101;
let isCall = 1;
@@ -4803,7 +4839,7 @@ def J2_callrf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001001;
@@ -4821,7 +4857,7 @@ def J2_callrt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001000;
@@ -4838,7 +4874,7 @@ def J2_callt : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if ($Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -4864,7 +4900,7 @@ def J2_endloop0 : HInst<
(outs),
(ins),
"endloop0",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC0, SA0];
let Defs = [LC0, P3, PC, USR];
let isBranch = 1;
@@ -4875,7 +4911,7 @@ def J2_endloop01 : HInst<
(outs),
(ins),
"endloop01",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC0, LC1, SA0, SA1];
let Defs = [LC0, LC1, P3, PC, USR];
let isPseudo = 1;
@@ -4884,7 +4920,7 @@ def J2_endloop1 : HInst<
(outs),
(ins),
"endloop1",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
let Uses = [LC1, SA1];
let Defs = [LC1, PC];
let isBranch = 1;
@@ -4895,7 +4931,7 @@ def J2_jump : HInst<
(outs),
(ins b30_2Imm:$Ii),
"jump $Ii",
-tc_3669266a, TypeJ>, Enc_81ac1d, PredNewRel {
+tc_ae53734a, TypeJ>, Enc_81ac1d, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101100;
let isTerminator = 1;
@@ -4917,7 +4953,7 @@ def J2_jumpf : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4943,7 +4979,7 @@ def J2_jumpf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if (!$Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -4951,7 +4987,7 @@ def J2_jumpfnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b1;
@@ -4978,7 +5014,7 @@ def J2_jumpfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b1;
@@ -5005,7 +5041,7 @@ def J2_jumpfpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b1;
@@ -5031,7 +5067,7 @@ def J2_jumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"jumpr $Rs32",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8, PredNewRel {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8, PredNewRel {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010100;
let isTerminator = 1;
@@ -5048,7 +5084,7 @@ def J2_jumprf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011011;
@@ -5067,7 +5103,7 @@ def J2_jumprf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5075,7 +5111,7 @@ def J2_jumprfnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011011;
@@ -5095,7 +5131,7 @@ def J2_jumprfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011011;
@@ -5115,7 +5151,7 @@ def J2_jumprfpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011011;
@@ -5134,7 +5170,7 @@ def J2_jumprgtez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000101;
@@ -5152,7 +5188,7 @@ def J2_jumprgtezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000101;
@@ -5170,7 +5206,7 @@ def J2_jumprltez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000111;
@@ -5188,7 +5224,7 @@ def J2_jumprltezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000111;
@@ -5206,7 +5242,7 @@ def J2_jumprnz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000110;
@@ -5224,7 +5260,7 @@ def J2_jumprnzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000110;
@@ -5242,7 +5278,7 @@ def J2_jumprt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011010;
@@ -5260,7 +5296,7 @@ def J2_jumprt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5268,7 +5304,7 @@ def J2_jumprtnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011010;
@@ -5287,7 +5323,7 @@ def J2_jumprtnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011010;
@@ -5306,7 +5342,7 @@ def J2_jumprtpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011010;
@@ -5324,7 +5360,7 @@ def J2_jumprz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000100;
@@ -5342,7 +5378,7 @@ def J2_jumprzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000100;
@@ -5360,7 +5396,7 @@ def J2_jumpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -5385,7 +5421,7 @@ def J2_jumpt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if ($Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5393,7 +5429,7 @@ def J2_jumptnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b0;
@@ -5419,7 +5455,7 @@ def J2_jumptnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b0;
@@ -5445,7 +5481,7 @@ def J2_jumptpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b0;
@@ -5470,7 +5506,7 @@ def J2_loop0i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop0($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001000;
@@ -5487,7 +5523,7 @@ def J2_loop0r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop0($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5505,7 +5541,7 @@ def J2_loop1i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop1($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001001;
@@ -5522,7 +5558,7 @@ def J2_loop1r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop1($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5540,7 +5576,7 @@ def J2_pause : HInst<
(outs),
(ins u8_0Imm:$Ii),
"pause(#$Ii)",
-tc_681a2300, TypeJ>, Enc_a51a9a {
+tc_8d9d0154, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5551,7 +5587,7 @@ def J2_ploop1si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp1loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001101;
@@ -5569,7 +5605,7 @@ def J2_ploop1sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp1loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5588,7 +5624,7 @@ def J2_ploop2si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp2loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001110;
@@ -5606,7 +5642,7 @@ def J2_ploop2sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp2loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5625,7 +5661,7 @@ def J2_ploop3si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp3loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001111;
@@ -5643,7 +5679,7 @@ def J2_ploop3sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp3loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5662,18 +5698,19 @@ def J2_trap0 : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap0(#$Ii)",
-tc_14cd4cfa, TypeJ>, Enc_a51a9a {
+tc_fc3999b4, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0101010000000000;
let isSolo = 1;
+let hasSideEffects = 1;
}
def J2_trap1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u8_0Imm:$Ii),
"trap1($Rx32,#$Ii)",
-tc_59a01ead, TypeJ>, Enc_33f8ba {
+tc_b9e09e03, TypeJ>, Enc_33f8ba {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5683,13 +5720,15 @@ let opNewValue = 0;
let isSolo = 1;
let Uses = [GOSP];
let Defs = [GOSP, PC];
+let hasSideEffects = 1;
let Constraints = "$Rx32 = $Rx32in";
}
def J2_trap1_noregmap : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap1(#$Ii)",
-tc_59a01ead, TypeMAPPING> {
+tc_b9e09e03, TypeMAPPING> {
+let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5697,7 +5736,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5723,7 +5762,7 @@ def J4_cmpeq_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5749,7 +5788,7 @@ def J4_cmpeq_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010001;
@@ -5775,7 +5814,7 @@ def J4_cmpeq_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010001;
@@ -5801,7 +5840,7 @@ def J4_cmpeq_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010001;
@@ -5827,7 +5866,7 @@ def J4_cmpeq_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010001;
@@ -5853,7 +5892,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5878,7 +5917,7 @@ def J4_cmpeq_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5903,7 +5942,7 @@ def J4_cmpeq_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010000;
@@ -5928,7 +5967,7 @@ def J4_cmpeq_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010000;
@@ -5953,7 +5992,7 @@ def J4_cmpeq_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010000;
@@ -5978,7 +6017,7 @@ def J4_cmpeq_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010000;
@@ -6003,7 +6042,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6029,7 +6068,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6055,7 +6094,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000001;
@@ -6081,7 +6120,7 @@ def J4_cmpeqi_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000001;
@@ -6107,7 +6146,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001001;
@@ -6133,7 +6172,7 @@ def J4_cmpeqi_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001001;
@@ -6159,7 +6198,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6184,7 +6223,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6209,7 +6248,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000000;
@@ -6234,7 +6273,7 @@ def J4_cmpeqi_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000000;
@@ -6259,7 +6298,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001000;
@@ -6284,7 +6323,7 @@ def J4_cmpeqi_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001000;
@@ -6309,7 +6348,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_e90a15, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_e90a15, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6335,7 +6374,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_5a18b3, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_5a18b3, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6361,7 +6400,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_1de724, PredRel {
+tc_3d495a39, TypeCJ>, Enc_1de724, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000111;
@@ -6387,7 +6426,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14640c, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14640c, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000111;
@@ -6413,7 +6452,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_668704, PredRel {
+tc_3d495a39, TypeCJ>, Enc_668704, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001111;
@@ -6439,7 +6478,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_800e04, PredRel {
+tc_3d495a39, TypeCJ>, Enc_800e04, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001111;
@@ -6465,7 +6504,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_4aca3a, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_4aca3a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6490,7 +6529,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f7ea77, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f7ea77, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6515,7 +6554,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_405228, PredRel {
+tc_3d495a39, TypeCJ>, Enc_405228, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000110;
@@ -6540,7 +6579,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_3a2484, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3a2484, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000110;
@@ -6565,7 +6604,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_736575, PredRel {
+tc_3d495a39, TypeCJ>, Enc_736575, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001110;
@@ -6590,7 +6629,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_8e583a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_8e583a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001110;
@@ -6615,7 +6654,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6641,7 +6680,7 @@ def J4_cmpgt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6667,7 +6706,7 @@ def J4_cmpgt_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010011;
@@ -6693,7 +6732,7 @@ def J4_cmpgt_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010011;
@@ -6719,7 +6758,7 @@ def J4_cmpgt_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010011;
@@ -6745,7 +6784,7 @@ def J4_cmpgt_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010011;
@@ -6771,7 +6810,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6796,7 +6835,7 @@ def J4_cmpgt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6821,7 +6860,7 @@ def J4_cmpgt_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010010;
@@ -6846,7 +6885,7 @@ def J4_cmpgt_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010010;
@@ -6871,7 +6910,7 @@ def J4_cmpgt_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010010;
@@ -6896,7 +6935,7 @@ def J4_cmpgt_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010010;
@@ -6921,7 +6960,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6947,7 +6986,7 @@ def J4_cmpgti_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6973,7 +7012,7 @@ def J4_cmpgti_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000011;
@@ -6999,7 +7038,7 @@ def J4_cmpgti_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000011;
@@ -7025,7 +7064,7 @@ def J4_cmpgti_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001011;
@@ -7051,7 +7090,7 @@ def J4_cmpgti_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001011;
@@ -7077,7 +7116,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7102,7 +7141,7 @@ def J4_cmpgti_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7127,7 +7166,7 @@ def J4_cmpgti_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000010;
@@ -7152,7 +7191,7 @@ def J4_cmpgti_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000010;
@@ -7177,7 +7216,7 @@ def J4_cmpgti_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001010;
@@ -7202,7 +7241,7 @@ def J4_cmpgti_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001010;
@@ -7227,7 +7266,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_3694bd, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_3694bd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7253,7 +7292,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_a6853f, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_a6853f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7279,7 +7318,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_a42857, PredRel {
+tc_3d495a39, TypeCJ>, Enc_a42857, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000111;
@@ -7305,7 +7344,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_f6fe0b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_f6fe0b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000111;
@@ -7331,7 +7370,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_3e3989, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3e3989, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001111;
@@ -7357,7 +7396,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_b909d2, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b909d2, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001111;
@@ -7383,7 +7422,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f82302, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f82302, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7408,7 +7447,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_6413b6, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_6413b6, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7433,7 +7472,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b78edd, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b78edd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000110;
@@ -7458,7 +7497,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_041d7b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_041d7b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000110;
@@ -7483,7 +7522,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b1e1fb, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b1e1fb, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001110;
@@ -7508,7 +7547,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_178717, PredRel {
+tc_3d495a39, TypeCJ>, Enc_178717, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001110;
@@ -7533,7 +7572,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7559,7 +7598,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7585,7 +7624,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010101;
@@ -7611,7 +7650,7 @@ def J4_cmpgtu_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010101;
@@ -7637,7 +7676,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010101;
@@ -7663,7 +7702,7 @@ def J4_cmpgtu_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010101;
@@ -7689,7 +7728,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7714,7 +7753,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7739,7 +7778,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010100;
@@ -7764,7 +7803,7 @@ def J4_cmpgtu_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010100;
@@ -7789,7 +7828,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010100;
@@ -7814,7 +7853,7 @@ def J4_cmpgtu_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010100;
@@ -7839,7 +7878,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7865,7 +7904,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7891,7 +7930,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000101;
@@ -7917,7 +7956,7 @@ def J4_cmpgtui_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000101;
@@ -7943,7 +7982,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001101;
@@ -7969,7 +8008,7 @@ def J4_cmpgtui_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001101;
@@ -7995,7 +8034,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8020,7 +8059,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8045,7 +8084,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000100;
@@ -8070,7 +8109,7 @@ def J4_cmpgtui_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000100;
@@ -8095,7 +8134,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001100;
@@ -8120,7 +8159,7 @@ def J4_cmpgtui_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001100;
@@ -8145,7 +8184,7 @@ def J4_cmplt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8171,7 +8210,7 @@ def J4_cmplt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8197,7 +8236,7 @@ def J4_cmplt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8222,7 +8261,7 @@ def J4_cmplt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8247,7 +8286,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8273,7 +8312,7 @@ def J4_cmpltu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8299,7 +8338,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8324,7 +8363,7 @@ def J4_cmpltu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8349,7 +8388,7 @@ def J4_hintjumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"hintjr($Rs32)",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8 {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010101;
let isTerminator = 1;
@@ -8361,7 +8400,7 @@ def J4_jumpseti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_0Imm:$II, b30_2Imm:$Ii),
"$Rd16 = #$II ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_9e4c3f {
+tc_0663f615, TypeCJ>, Enc_9e4c3f {
let Inst{0-0} = 0b0;
let Inst{31-22} = 0b0001011000;
let hasNewValue = 1;
@@ -8381,7 +8420,7 @@ def J4_jumpsetr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"$Rd16 = $Rs16 ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_66bce1 {
+tc_0663f615, TypeCJ>, Enc_66bce1 {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001011100;
@@ -8402,7 +8441,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8427,7 +8466,7 @@ def J4_tstbit0_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8452,7 +8491,7 @@ def J4_tstbit0_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000111;
@@ -8477,7 +8516,7 @@ def J4_tstbit0_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000111;
@@ -8502,7 +8541,7 @@ def J4_tstbit0_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001111;
@@ -8527,7 +8566,7 @@ def J4_tstbit0_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001111;
@@ -8552,7 +8591,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8576,7 +8615,7 @@ def J4_tstbit0_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8600,7 +8639,7 @@ def J4_tstbit0_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000110;
@@ -8624,7 +8663,7 @@ def J4_tstbit0_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000110;
@@ -8648,7 +8687,7 @@ def J4_tstbit0_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001110;
@@ -8672,7 +8711,7 @@ def J4_tstbit0_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001110;
@@ -8696,7 +8735,7 @@ def L2_deallocframe : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = deallocframe($Rs32):raw",
-tc_d1090e34, TypeLD>, Enc_3a3d62 {
+tc_15aa71c5, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010000000;
let accessSize = DoubleWordAccess;
@@ -8708,7 +8747,7 @@ def L2_loadalignb_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_a27588 {
+tc_5ef37dc4, TypeLD>, Enc_a27588 {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8725,9 +8764,10 @@ def L2_loadalignb_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110100;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8736,7 +8776,7 @@ def L2_loadalignb_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_74aef2 {
+tc_785f65a7, TypeLD>, Enc_74aef2 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8749,7 +8789,7 @@ def L2_loadalignb_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8762,7 +8802,7 @@ def L2_loadalignb_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_6b197f {
+tc_3c76b0ff, TypeLD>, Enc_6b197f {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010100;
let addrMode = PostInc;
@@ -8774,7 +8814,7 @@ def L2_loadalignb_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100100;
let addrMode = PostInc;
@@ -8786,7 +8826,7 @@ def L2_loadalignb_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memb_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8795,7 +8835,7 @@ def L2_loadalignh_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_5cd7e9 {
+tc_5ef37dc4, TypeLD>, Enc_5cd7e9 {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8812,9 +8852,10 @@ def L2_loadalignh_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110010;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8823,7 +8864,7 @@ def L2_loadalignh_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_9e2e1c {
+tc_785f65a7, TypeLD>, Enc_9e2e1c {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8836,7 +8877,7 @@ def L2_loadalignh_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8849,7 +8890,7 @@ def L2_loadalignh_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_bd1cbc {
+tc_3c76b0ff, TypeLD>, Enc_bd1cbc {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010010;
let addrMode = PostInc;
@@ -8861,7 +8902,7 @@ def L2_loadalignh_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100010;
let addrMode = PostInc;
@@ -8873,7 +8914,7 @@ def L2_loadalignh_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memh_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8882,7 +8923,7 @@ def L2_loadbsw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -8900,11 +8941,12 @@ def L2_loadbsw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110001;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -8913,7 +8955,7 @@ def L2_loadbsw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8928,7 +8970,7 @@ def L2_loadbsw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8943,7 +8985,7 @@ def L2_loadbsw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010001;
let hasNewValue = 1;
@@ -8957,7 +8999,7 @@ def L2_loadbsw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -8971,7 +9013,7 @@ def L2_loadbsw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -8981,7 +9023,7 @@ def L2_loadbsw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0111;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8997,9 +9039,10 @@ def L2_loadbsw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110111;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9008,7 +9051,7 @@ def L2_loadbsw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9021,7 +9064,7 @@ def L2_loadbsw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9034,7 +9077,7 @@ def L2_loadbsw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010111;
let addrMode = PostInc;
@@ -9046,7 +9089,7 @@ def L2_loadbsw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100111;
let addrMode = PostInc;
@@ -9058,7 +9101,7 @@ def L2_loadbsw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9066,7 +9109,7 @@ def L2_loadbzw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9084,11 +9127,12 @@ def L2_loadbzw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110011;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9097,7 +9141,7 @@ def L2_loadbzw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9112,7 +9156,7 @@ def L2_loadbzw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9127,7 +9171,7 @@ def L2_loadbzw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010011;
let hasNewValue = 1;
@@ -9141,7 +9185,7 @@ def L2_loadbzw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -9155,7 +9199,7 @@ def L2_loadbzw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9165,7 +9209,7 @@ def L2_loadbzw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9181,9 +9225,10 @@ def L2_loadbzw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110101;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9192,7 +9237,7 @@ def L2_loadbzw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9205,7 +9250,7 @@ def L2_loadbzw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9218,7 +9263,7 @@ def L2_loadbzw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010101;
let addrMode = PostInc;
@@ -9230,7 +9275,7 @@ def L2_loadbzw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100101;
let addrMode = PostInc;
@@ -9242,7 +9287,7 @@ def L2_loadbzw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9250,7 +9295,7 @@ def L2_loadrb_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memb($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9271,11 +9316,12 @@ def L2_loadrb_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9284,7 +9330,7 @@ def L2_loadrb_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9299,7 +9345,7 @@ def L2_loadrb_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9314,7 +9360,7 @@ def L2_loadrb_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memb($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011000;
let hasNewValue = 1;
@@ -9331,7 +9377,7 @@ def L2_loadrb_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -9345,7 +9391,7 @@ def L2_loadrb_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memb($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9355,7 +9401,7 @@ def L2_loadrbgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9374,7 +9420,7 @@ def L2_loadrd_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s29_3Imm:$Ii),
"$Rdd32 = memd($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9393,9 +9439,10 @@ def L2_loadrd_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111110;
+let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9404,7 +9451,7 @@ def L2_loadrd_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_b05839 {
+tc_e93a3d71, TypeLD>, Enc_b05839 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9417,7 +9464,7 @@ def L2_loadrd_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9430,7 +9477,7 @@ def L2_loadrd_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii),
"$Rdd32 = memd($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011110;
let addrMode = PostInc;
@@ -9445,7 +9492,7 @@ def L2_loadrd_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101110;
let addrMode = PostInc;
@@ -9457,7 +9504,7 @@ def L2_loadrd_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9465,7 +9512,7 @@ def L2_loadrdgp : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -9482,7 +9529,7 @@ def L2_loadrh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9503,11 +9550,12 @@ def L2_loadrh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9516,7 +9564,7 @@ def L2_loadrh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9531,7 +9579,7 @@ def L2_loadrh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9546,7 +9594,7 @@ def L2_loadrh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011010;
let hasNewValue = 1;
@@ -9563,7 +9611,7 @@ def L2_loadrh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -9577,7 +9625,7 @@ def L2_loadrh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9587,7 +9635,7 @@ def L2_loadrhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9606,7 +9654,7 @@ def L2_loadri_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rd32 = memw($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9627,11 +9675,12 @@ def L2_loadri_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9640,7 +9689,7 @@ def L2_loadri_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_27fd0e {
+tc_e93a3d71, TypeLD>, Enc_27fd0e {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9655,7 +9704,7 @@ def L2_loadri_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9670,7 +9719,7 @@ def L2_loadri_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rd32 = memw($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011100;
let hasNewValue = 1;
@@ -9687,7 +9736,7 @@ def L2_loadri_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -9701,7 +9750,7 @@ def L2_loadri_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9711,7 +9760,7 @@ def L2_loadrigp : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9730,7 +9779,7 @@ def L2_loadrub_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memub($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9751,11 +9800,12 @@ def L2_loadrub_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9764,7 +9814,7 @@ def L2_loadrub_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9779,7 +9829,7 @@ def L2_loadrub_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9794,7 +9844,7 @@ def L2_loadrub_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memub($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011001;
let hasNewValue = 1;
@@ -9811,7 +9861,7 @@ def L2_loadrub_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -9825,7 +9875,7 @@ def L2_loadrub_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memub($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9835,7 +9885,7 @@ def L2_loadrubgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9854,7 +9904,7 @@ def L2_loadruh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memuh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9875,11 +9925,12 @@ def L2_loadruh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -9888,7 +9939,7 @@ def L2_loadruh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9903,7 +9954,7 @@ def L2_loadruh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9918,7 +9969,7 @@ def L2_loadruh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memuh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011011;
let hasNewValue = 1;
@@ -9935,7 +9986,7 @@ def L2_loadruh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -9949,7 +10000,7 @@ def L2_loadruh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memuh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9959,7 +10010,7 @@ def L2_loadruhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9978,7 +10029,7 @@ def L2_loadw_locked : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_5e2823 {
+tc_b43e7930, TypeLD>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010010000;
let hasNewValue = 1;
@@ -9991,7 +10042,7 @@ def L2_ploadrbf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101000;
let isPredicated = 1;
@@ -10013,7 +10064,7 @@ def L2_ploadrbf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10030,7 +10081,7 @@ def L2_ploadrbf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10040,7 +10091,7 @@ def L2_ploadrbfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111000;
let isPredicated = 1;
@@ -10063,7 +10114,7 @@ def L2_ploadrbfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10081,7 +10132,7 @@ def L2_ploadrbfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10091,7 +10142,7 @@ def L2_ploadrbt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001000;
let isPredicated = 1;
@@ -10112,7 +10163,7 @@ def L2_ploadrbt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10128,7 +10179,7 @@ def L2_ploadrbt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10138,7 +10189,7 @@ def L2_ploadrbtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011000;
let isPredicated = 1;
@@ -10160,7 +10211,7 @@ def L2_ploadrbtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10177,7 +10228,7 @@ def L2_ploadrbtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10187,7 +10238,7 @@ def L2_ploadrdf_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101110;
let isPredicated = 1;
@@ -10207,7 +10258,7 @@ def L2_ploadrdf_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10222,7 +10273,7 @@ def L2_ploadrdf_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10230,7 +10281,7 @@ def L2_ploadrdfnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111110;
let isPredicated = 1;
@@ -10251,7 +10302,7 @@ def L2_ploadrdfnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10267,7 +10318,7 @@ def L2_ploadrdfnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10275,7 +10326,7 @@ def L2_ploadrdt_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001110;
let isPredicated = 1;
@@ -10294,7 +10345,7 @@ def L2_ploadrdt_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10308,7 +10359,7 @@ def L2_ploadrdt_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10316,7 +10367,7 @@ def L2_ploadrdtnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011110;
let isPredicated = 1;
@@ -10336,7 +10387,7 @@ def L2_ploadrdtnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10351,7 +10402,7 @@ def L2_ploadrdtnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10359,7 +10410,7 @@ def L2_ploadrhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101010;
let isPredicated = 1;
@@ -10381,7 +10432,7 @@ def L2_ploadrhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10398,7 +10449,7 @@ def L2_ploadrhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10408,7 +10459,7 @@ def L2_ploadrhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111010;
let isPredicated = 1;
@@ -10431,7 +10482,7 @@ def L2_ploadrhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10449,7 +10500,7 @@ def L2_ploadrhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10459,7 +10510,7 @@ def L2_ploadrht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001010;
let isPredicated = 1;
@@ -10480,7 +10531,7 @@ def L2_ploadrht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10496,7 +10547,7 @@ def L2_ploadrht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10506,7 +10557,7 @@ def L2_ploadrhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011010;
let isPredicated = 1;
@@ -10528,7 +10579,7 @@ def L2_ploadrhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10545,7 +10596,7 @@ def L2_ploadrhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10555,7 +10606,7 @@ def L2_ploadrif_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101100;
let isPredicated = 1;
@@ -10577,7 +10628,7 @@ def L2_ploadrif_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10594,7 +10645,7 @@ def L2_ploadrif_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10604,7 +10655,7 @@ def L2_ploadrifnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111100;
let isPredicated = 1;
@@ -10627,7 +10678,7 @@ def L2_ploadrifnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10645,7 +10696,7 @@ def L2_ploadrifnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10655,7 +10706,7 @@ def L2_ploadrit_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001100;
let isPredicated = 1;
@@ -10676,7 +10727,7 @@ def L2_ploadrit_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10692,7 +10743,7 @@ def L2_ploadrit_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10702,7 +10753,7 @@ def L2_ploadritnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011100;
let isPredicated = 1;
@@ -10724,7 +10775,7 @@ def L2_ploadritnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10741,7 +10792,7 @@ def L2_ploadritnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10751,7 +10802,7 @@ def L2_ploadrubf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101001;
let isPredicated = 1;
@@ -10773,7 +10824,7 @@ def L2_ploadrubf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10790,7 +10841,7 @@ def L2_ploadrubf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10800,7 +10851,7 @@ def L2_ploadrubfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111001;
let isPredicated = 1;
@@ -10823,7 +10874,7 @@ def L2_ploadrubfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10841,7 +10892,7 @@ def L2_ploadrubfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10851,7 +10902,7 @@ def L2_ploadrubt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001001;
let isPredicated = 1;
@@ -10872,7 +10923,7 @@ def L2_ploadrubt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10888,7 +10939,7 @@ def L2_ploadrubt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10898,7 +10949,7 @@ def L2_ploadrubtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011001;
let isPredicated = 1;
@@ -10920,7 +10971,7 @@ def L2_ploadrubtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10937,7 +10988,7 @@ def L2_ploadrubtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10947,7 +10998,7 @@ def L2_ploadruhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101011;
let isPredicated = 1;
@@ -10969,7 +11020,7 @@ def L2_ploadruhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -10986,7 +11037,7 @@ def L2_ploadruhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10996,7 +11047,7 @@ def L2_ploadruhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111011;
let isPredicated = 1;
@@ -11019,7 +11070,7 @@ def L2_ploadruhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11037,7 +11088,7 @@ def L2_ploadruhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11047,7 +11098,7 @@ def L2_ploadruht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001011;
let isPredicated = 1;
@@ -11068,7 +11119,7 @@ def L2_ploadruht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11084,7 +11135,7 @@ def L2_ploadruht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11094,7 +11145,7 @@ def L2_ploadruhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011011;
let isPredicated = 1;
@@ -11116,7 +11167,7 @@ def L2_ploadruhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11133,7 +11184,7 @@ def L2_ploadruhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11143,7 +11194,7 @@ def L4_add_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11162,7 +11213,7 @@ def L4_add_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11170,7 +11221,7 @@ def L4_add_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11189,7 +11240,7 @@ def L4_add_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11197,7 +11248,7 @@ def L4_add_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11216,7 +11267,7 @@ def L4_add_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11224,7 +11275,7 @@ def L4_and_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11243,7 +11294,7 @@ def L4_and_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11251,7 +11302,7 @@ def L4_and_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11270,7 +11321,7 @@ def L4_and_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11278,7 +11329,7 @@ def L4_and_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11297,7 +11348,7 @@ def L4_and_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11305,7 +11356,7 @@ def L4_iadd_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11324,7 +11375,7 @@ def L4_iadd_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11332,7 +11383,7 @@ def L4_iadd_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11351,7 +11402,7 @@ def L4_iadd_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11359,7 +11410,7 @@ def L4_iadd_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11378,7 +11429,7 @@ def L4_iadd_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11386,7 +11437,7 @@ def L4_iand_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11405,7 +11456,7 @@ def L4_iand_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11413,7 +11464,7 @@ def L4_iand_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11432,7 +11483,7 @@ def L4_iand_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11440,7 +11491,7 @@ def L4_iand_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11459,7 +11510,7 @@ def L4_iand_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11467,7 +11518,7 @@ def L4_ior_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11486,7 +11537,7 @@ def L4_ior_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11494,7 +11545,7 @@ def L4_ior_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11513,7 +11564,7 @@ def L4_ior_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11521,7 +11572,7 @@ def L4_ior_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11540,7 +11591,7 @@ def L4_ior_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11548,7 +11599,7 @@ def L4_isub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11567,7 +11618,7 @@ def L4_isub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11575,7 +11626,7 @@ def L4_isub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11594,7 +11645,7 @@ def L4_isub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11602,7 +11653,7 @@ def L4_isub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11621,7 +11672,7 @@ def L4_isub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11629,7 +11680,7 @@ def L4_loadalignb_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010100;
@@ -11649,7 +11700,7 @@ def L4_loadalignb_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100100;
let addrMode = BaseLongOffset;
@@ -11669,7 +11720,7 @@ def L4_loadalignh_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010010;
@@ -11689,7 +11740,7 @@ def L4_loadalignh_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100010;
let addrMode = BaseLongOffset;
@@ -11709,7 +11760,7 @@ def L4_loadbsw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010001;
@@ -11730,7 +11781,7 @@ def L4_loadbsw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -11751,7 +11802,7 @@ def L4_loadbsw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010111;
@@ -11770,7 +11821,7 @@ def L4_loadbsw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100111;
let addrMode = BaseLongOffset;
@@ -11789,7 +11840,7 @@ def L4_loadbzw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010011;
@@ -11810,7 +11861,7 @@ def L4_loadbzw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -11831,7 +11882,7 @@ def L4_loadbzw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010101;
@@ -11850,7 +11901,7 @@ def L4_loadbzw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100101;
let addrMode = BaseLongOffset;
@@ -11869,7 +11920,7 @@ def L4_loadd_locked : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_3a3d62 {
+tc_b43e7930, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b010000000;
let Inst{31-21} = 0b10010010000;
let accessSize = DoubleWordAccess;
@@ -11880,7 +11931,7 @@ def L4_loadrb_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memb($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011000;
@@ -11901,7 +11952,7 @@ def L4_loadrb_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010000;
let hasNewValue = 1;
@@ -11918,7 +11969,7 @@ def L4_loadrb_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memb($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -11940,7 +11991,7 @@ def L4_loadrd_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memd($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011110;
@@ -11959,7 +12010,7 @@ def L4_loadrd_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010110;
let addrMode = BaseRegOffset;
@@ -11974,7 +12025,7 @@ def L4_loadrd_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memd($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101110;
let addrMode = BaseLongOffset;
@@ -11994,7 +12045,7 @@ def L4_loadrh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011010;
@@ -12015,7 +12066,7 @@ def L4_loadrh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010010;
let hasNewValue = 1;
@@ -12032,7 +12083,7 @@ def L4_loadrh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -12054,7 +12105,7 @@ def L4_loadri_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memw($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011100;
@@ -12075,7 +12126,7 @@ def L4_loadri_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010100;
let hasNewValue = 1;
@@ -12092,7 +12143,7 @@ def L4_loadri_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memw($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -12114,7 +12165,7 @@ def L4_loadrub_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memub($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011001;
@@ -12135,7 +12186,7 @@ def L4_loadrub_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010001;
let hasNewValue = 1;
@@ -12152,7 +12203,7 @@ def L4_loadrub_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memub($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -12174,7 +12225,7 @@ def L4_loadruh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memuh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011011;
@@ -12195,7 +12246,7 @@ def L4_loadruh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010011;
let hasNewValue = 1;
@@ -12212,7 +12263,7 @@ def L4_loadruh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memuh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -12234,7 +12285,7 @@ def L4_or_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -12253,7 +12304,7 @@ def L4_or_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12261,7 +12312,7 @@ def L4_or_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -12280,7 +12331,7 @@ def L4_or_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12288,7 +12339,7 @@ def L4_or_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -12307,7 +12358,7 @@ def L4_or_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12315,7 +12366,7 @@ def L4_ploadrbf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111000;
@@ -12340,7 +12391,7 @@ def L4_ploadrbf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12357,7 +12408,7 @@ def L4_ploadrbfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111000;
@@ -12383,7 +12434,7 @@ def L4_ploadrbfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12401,7 +12452,7 @@ def L4_ploadrbt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111000;
@@ -12425,7 +12476,7 @@ def L4_ploadrbt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12441,7 +12492,7 @@ def L4_ploadrbtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111000;
@@ -12466,7 +12517,7 @@ def L4_ploadrbtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12483,7 +12534,7 @@ def L4_ploadrdf_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111110;
@@ -12506,7 +12557,7 @@ def L4_ploadrdf_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12521,7 +12572,7 @@ def L4_ploadrdfnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111110;
@@ -12545,7 +12596,7 @@ def L4_ploadrdfnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110011110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12561,7 +12612,7 @@ def L4_ploadrdt_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111110;
@@ -12583,7 +12634,7 @@ def L4_ploadrdt_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110000110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -12597,7 +12648,7 @@ def L4_ploadrdtnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111110;
@@ -12620,7 +12671,7 @@ def L4_ploadrdtnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110010110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -12635,7 +12686,7 @@ def L4_ploadrhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111010;
@@ -12660,7 +12711,7 @@ def L4_ploadrhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12677,7 +12728,7 @@ def L4_ploadrhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111010;
@@ -12703,7 +12754,7 @@ def L4_ploadrhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12721,7 +12772,7 @@ def L4_ploadrht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111010;
@@ -12745,7 +12796,7 @@ def L4_ploadrht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12761,7 +12812,7 @@ def L4_ploadrhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111010;
@@ -12786,7 +12837,7 @@ def L4_ploadrhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12803,7 +12854,7 @@ def L4_ploadrif_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111100;
@@ -12828,7 +12879,7 @@ def L4_ploadrif_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12845,7 +12896,7 @@ def L4_ploadrifnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111100;
@@ -12871,7 +12922,7 @@ def L4_ploadrifnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12889,7 +12940,7 @@ def L4_ploadrit_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111100;
@@ -12913,7 +12964,7 @@ def L4_ploadrit_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12929,7 +12980,7 @@ def L4_ploadritnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111100;
@@ -12954,7 +13005,7 @@ def L4_ploadritnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12971,7 +13022,7 @@ def L4_ploadrubf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111001;
@@ -12996,7 +13047,7 @@ def L4_ploadrubf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13013,7 +13064,7 @@ def L4_ploadrubfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111001;
@@ -13039,7 +13090,7 @@ def L4_ploadrubfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13057,7 +13108,7 @@ def L4_ploadrubt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111001;
@@ -13081,7 +13132,7 @@ def L4_ploadrubt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13097,7 +13148,7 @@ def L4_ploadrubtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111001;
@@ -13122,7 +13173,7 @@ def L4_ploadrubtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13139,7 +13190,7 @@ def L4_ploadruhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111011;
@@ -13164,7 +13215,7 @@ def L4_ploadruhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13181,7 +13232,7 @@ def L4_ploadruhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111011;
@@ -13207,7 +13258,7 @@ def L4_ploadruhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13225,7 +13276,7 @@ def L4_ploadruht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111011;
@@ -13249,7 +13300,7 @@ def L4_ploadruht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13265,7 +13316,7 @@ def L4_ploadruhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111011;
@@ -13290,7 +13341,7 @@ def L4_ploadruhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13307,7 +13358,7 @@ def L4_return : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = dealloc_return($Rs32):raw",
-tc_3d04548d, TypeLD>, Enc_3a3d62, PredNewRel {
+tc_675e4897, TypeLD>, Enc_3a3d62, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010110000;
let isTerminator = 1;
@@ -13328,7 +13379,7 @@ def L4_return_f : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1100;
let Inst{31-21} = 0b10010110000;
@@ -13350,7 +13401,7 @@ def L4_return_fnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b10010110000;
@@ -13373,7 +13424,7 @@ def L4_return_fnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1110;
let Inst{31-21} = 0b10010110000;
@@ -13396,7 +13447,7 @@ def L4_return_map_to_raw_f : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4) dealloc_return",
-tc_513bef45, TypeMAPPING>, Requires<[HasV65]> {
+tc_2b8da4c2, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13404,7 +13455,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:nt",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13412,7 +13463,7 @@ def L4_return_map_to_raw_fnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:t",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13420,7 +13471,7 @@ def L4_return_map_to_raw_t : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4) dealloc_return",
-tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65]> {
+tc_4d5fa3a1, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13428,7 +13479,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:nt",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13436,7 +13487,7 @@ def L4_return_map_to_raw_tnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:t",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13444,7 +13495,7 @@ def L4_return_t : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b10010110000;
@@ -13465,7 +13516,7 @@ def L4_return_tnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b10010110000;
@@ -13487,7 +13538,7 @@ def L4_return_tnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b10010110000;
@@ -13509,7 +13560,7 @@ def L4_sub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -13528,7 +13579,7 @@ def L4_sub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13536,7 +13587,7 @@ def L4_sub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -13555,7 +13606,7 @@ def L4_sub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13563,7 +13614,7 @@ def L4_sub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -13582,7 +13633,7 @@ def L4_sub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13590,15 +13641,26 @@ def L6_deallocframe_map_to_raw : HInst<
(outs),
(ins),
"deallocframe",
-tc_d1090e34, TypeMAPPING>, Requires<[HasV65]> {
+tc_15aa71c5, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
+def L6_memcpy : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2),
+"memcpy($Rs32,$Rt32,$Mu2)",
+tc_a6b1eca9, TypeLD>, Enc_a75aa6, Requires<[HasV66]> {
+let Inst{7-0} = 0b01000000;
+let Inst{31-21} = 0b10010010000;
+let mayLoad = 1;
+let isSolo = 1;
+let mayStore = 1;
+}
def L6_return_map_to_raw : HInst<
(outs),
(ins),
"dealloc_return",
-tc_3d04548d, TypeMAPPING>, Requires<[HasV65]> {
+tc_675e4897, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13606,7 +13668,7 @@ def M2_acci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -13621,7 +13683,7 @@ def M2_accii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 += add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010000;
let hasNewValue = 1;
@@ -13640,7 +13702,7 @@ def M2_cmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13651,7 +13713,7 @@ def M2_cmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyr($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13662,7 +13724,7 @@ def M2_cmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13674,7 +13736,7 @@ def M2_cmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13686,7 +13748,7 @@ def M2_cmacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13698,7 +13760,7 @@ def M2_cmacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13710,7 +13772,7 @@ def M2_cmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13720,7 +13782,7 @@ def M2_cmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyr($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13730,7 +13792,7 @@ def M2_cmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13743,7 +13805,7 @@ def M2_cmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13756,7 +13818,7 @@ def M2_cmpyrsc_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -13769,7 +13831,7 @@ def M2_cmpyrsc_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -13782,7 +13844,7 @@ def M2_cmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13793,7 +13855,7 @@ def M2_cmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -13804,7 +13866,7 @@ def M2_cmpysc_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -13815,7 +13877,7 @@ def M2_cmpysc_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -13826,7 +13888,7 @@ def M2_cnacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13838,7 +13900,7 @@ def M2_cnacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13850,7 +13912,7 @@ def M2_cnacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13862,7 +13924,7 @@ def M2_cnacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13874,7 +13936,7 @@ def M2_dpmpyss_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13885,7 +13947,7 @@ def M2_dpmpyss_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -13896,7 +13958,7 @@ def M2_dpmpyss_rnd_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13908,7 +13970,7 @@ def M2_dpmpyss_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13918,7 +13980,7 @@ def M2_dpmpyuu_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13929,7 +13991,7 @@ def M2_dpmpyuu_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -13940,7 +14002,7 @@ def M2_dpmpyuu_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -13950,7 +14012,7 @@ def M2_hmmpyh_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13963,7 +14025,7 @@ def M2_hmmpyh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13976,7 +14038,7 @@ def M2_hmmpyl_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -13989,7 +14051,7 @@ def M2_hmmpyl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -14002,7 +14064,7 @@ def M2_maci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_2ae154, ImmRegRel {
+tc_d773585a, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -14017,7 +14079,7 @@ def M2_macsin : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 -= mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca {
+tc_05d3a09b, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001100;
let hasNewValue = 1;
@@ -14035,7 +14097,7 @@ def M2_macsip : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 += mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca, ImmRegRel {
+tc_05d3a09b, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001000;
let hasNewValue = 1;
@@ -14054,7 +14116,7 @@ def M2_mmachs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14066,7 +14128,7 @@ def M2_mmachs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14078,7 +14140,7 @@ def M2_mmachs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14090,7 +14152,7 @@ def M2_mmachs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14102,7 +14164,7 @@ def M2_mmacls_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14114,7 +14176,7 @@ def M2_mmacls_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14126,7 +14188,7 @@ def M2_mmacls_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14138,7 +14200,7 @@ def M2_mmacls_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14150,7 +14212,7 @@ def M2_mmacuhs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14162,7 +14224,7 @@ def M2_mmacuhs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14174,7 +14236,7 @@ def M2_mmacuhs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14186,7 +14248,7 @@ def M2_mmacuhs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14198,7 +14260,7 @@ def M2_mmaculs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14210,7 +14272,7 @@ def M2_mmaculs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14222,7 +14284,7 @@ def M2_mmaculs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14234,7 +14296,7 @@ def M2_mmaculs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14246,7 +14308,7 @@ def M2_mmpyh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14257,7 +14319,7 @@ def M2_mmpyh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14268,7 +14330,7 @@ def M2_mmpyh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14279,7 +14341,7 @@ def M2_mmpyh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14290,7 +14352,7 @@ def M2_mmpyl_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14301,7 +14363,7 @@ def M2_mmpyl_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14312,7 +14374,7 @@ def M2_mmpyl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14323,7 +14385,7 @@ def M2_mmpyl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14334,7 +14396,7 @@ def M2_mmpyuh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14345,7 +14407,7 @@ def M2_mmpyuh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14356,7 +14418,7 @@ def M2_mmpyuh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14367,7 +14429,7 @@ def M2_mmpyuh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -14378,7 +14440,7 @@ def M2_mmpyul_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14389,7 +14451,7 @@ def M2_mmpyul_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14400,7 +14462,7 @@ def M2_mmpyul_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14411,18 +14473,31 @@ def M2_mmpyul_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
let prefersSlot3 = 1;
let Defs = [USR_OVF];
}
+def M2_mnaci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyi($Rs32,$Rt32)",
+tc_bdceeac1, TypeM>, Enc_2ae154, Requires<[HasV66]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
def M2_mpy_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14435,7 +14510,7 @@ def M2_mpy_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14448,7 +14523,7 @@ def M2_mpy_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14461,7 +14536,7 @@ def M2_mpy_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14474,7 +14549,7 @@ def M2_mpy_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14487,7 +14562,7 @@ def M2_mpy_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14500,7 +14575,7 @@ def M2_mpy_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14513,7 +14588,7 @@ def M2_mpy_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14526,7 +14601,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14540,7 +14615,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14554,7 +14629,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14568,7 +14643,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14582,7 +14657,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14596,7 +14671,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14610,7 +14685,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14624,7 +14699,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14638,7 +14713,7 @@ def M2_mpy_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14650,7 +14725,7 @@ def M2_mpy_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14662,7 +14737,7 @@ def M2_mpy_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14674,7 +14749,7 @@ def M2_mpy_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14686,7 +14761,7 @@ def M2_mpy_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14698,7 +14773,7 @@ def M2_mpy_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14710,7 +14785,7 @@ def M2_mpy_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14722,7 +14797,7 @@ def M2_mpy_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14734,7 +14809,7 @@ def M2_mpy_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14747,7 +14822,7 @@ def M2_mpy_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14760,7 +14835,7 @@ def M2_mpy_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14773,7 +14848,7 @@ def M2_mpy_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14786,7 +14861,7 @@ def M2_mpy_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14799,7 +14874,7 @@ def M2_mpy_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14812,7 +14887,7 @@ def M2_mpy_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14825,7 +14900,7 @@ def M2_mpy_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14838,7 +14913,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14852,7 +14927,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14866,7 +14941,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14880,7 +14955,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14894,7 +14969,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14908,7 +14983,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14922,7 +14997,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14936,7 +15011,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14950,7 +15025,7 @@ def M2_mpy_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -14962,7 +15037,7 @@ def M2_mpy_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -14974,7 +15049,7 @@ def M2_mpy_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -14986,7 +15061,7 @@ def M2_mpy_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -14998,7 +15073,7 @@ def M2_mpy_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15010,7 +15085,7 @@ def M2_mpy_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15022,7 +15097,7 @@ def M2_mpy_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15034,7 +15109,7 @@ def M2_mpy_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15046,7 +15121,7 @@ def M2_mpy_sat_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15059,7 +15134,7 @@ def M2_mpy_sat_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15072,7 +15147,7 @@ def M2_mpy_sat_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15085,7 +15160,7 @@ def M2_mpy_sat_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15098,7 +15173,7 @@ def M2_mpy_sat_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15111,7 +15186,7 @@ def M2_mpy_sat_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15124,7 +15199,7 @@ def M2_mpy_sat_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15137,7 +15212,7 @@ def M2_mpy_sat_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15150,7 +15225,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15163,7 +15238,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15176,7 +15251,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15189,7 +15264,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15202,7 +15277,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15215,7 +15290,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15228,7 +15303,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15241,7 +15316,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15254,7 +15329,7 @@ def M2_mpy_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15266,7 +15341,7 @@ def M2_mpy_up_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -15278,7 +15353,7 @@ def M2_mpy_up_s1_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -15291,7 +15366,7 @@ def M2_mpyd_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15302,7 +15377,7 @@ def M2_mpyd_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15313,7 +15388,7 @@ def M2_mpyd_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15324,7 +15399,7 @@ def M2_mpyd_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15335,7 +15410,7 @@ def M2_mpyd_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15346,7 +15421,7 @@ def M2_mpyd_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15357,7 +15432,7 @@ def M2_mpyd_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15368,7 +15443,7 @@ def M2_mpyd_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15379,7 +15454,7 @@ def M2_mpyd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15389,7 +15464,7 @@ def M2_mpyd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15399,7 +15474,7 @@ def M2_mpyd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15409,7 +15484,7 @@ def M2_mpyd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15419,7 +15494,7 @@ def M2_mpyd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15429,7 +15504,7 @@ def M2_mpyd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15439,7 +15514,7 @@ def M2_mpyd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15449,7 +15524,7 @@ def M2_mpyd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15459,7 +15534,7 @@ def M2_mpyd_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15470,7 +15545,7 @@ def M2_mpyd_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15481,7 +15556,7 @@ def M2_mpyd_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15492,7 +15567,7 @@ def M2_mpyd_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15503,7 +15578,7 @@ def M2_mpyd_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15514,7 +15589,7 @@ def M2_mpyd_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15525,7 +15600,7 @@ def M2_mpyd_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15536,7 +15611,7 @@ def M2_mpyd_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15547,7 +15622,7 @@ def M2_mpyd_rnd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15557,7 +15632,7 @@ def M2_mpyd_rnd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15567,7 +15642,7 @@ def M2_mpyd_rnd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15577,7 +15652,7 @@ def M2_mpyd_rnd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15587,7 +15662,7 @@ def M2_mpyd_rnd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15597,7 +15672,7 @@ def M2_mpyd_rnd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15607,7 +15682,7 @@ def M2_mpyd_rnd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15617,7 +15692,7 @@ def M2_mpyd_rnd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15627,7 +15702,7 @@ def M2_mpyi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be, ImmRegRel {
+tc_bafaade3, TypeM>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15641,7 +15716,7 @@ def M2_mpysin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Rd32 = -mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000100;
let hasNewValue = 1;
@@ -15652,7 +15727,7 @@ def M2_mpysip : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = +mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000000;
let hasNewValue = 1;
@@ -15668,7 +15743,7 @@ def M2_mpysmi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, m32_0Imm:$Ii),
"$Rd32 = mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, ImmRegRel {
+tc_c8ce0b5c, TypeM>, ImmRegRel {
let hasNewValue = 1;
let opNewValue = 0;
let CextOpcode = "M2_mpyi";
@@ -15684,7 +15759,7 @@ def M2_mpysu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpysu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -15696,7 +15771,7 @@ def M2_mpyu_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15709,7 +15784,7 @@ def M2_mpyu_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15722,7 +15797,7 @@ def M2_mpyu_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15735,7 +15810,7 @@ def M2_mpyu_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15748,7 +15823,7 @@ def M2_mpyu_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15761,7 +15836,7 @@ def M2_mpyu_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15774,7 +15849,7 @@ def M2_mpyu_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15787,7 +15862,7 @@ def M2_mpyu_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15800,7 +15875,7 @@ def M2_mpyu_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15812,7 +15887,7 @@ def M2_mpyu_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15824,7 +15899,7 @@ def M2_mpyu_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15836,7 +15911,7 @@ def M2_mpyu_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15848,7 +15923,7 @@ def M2_mpyu_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15860,7 +15935,7 @@ def M2_mpyu_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15872,7 +15947,7 @@ def M2_mpyu_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15884,7 +15959,7 @@ def M2_mpyu_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15896,7 +15971,7 @@ def M2_mpyu_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15909,7 +15984,7 @@ def M2_mpyu_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15922,7 +15997,7 @@ def M2_mpyu_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15935,7 +16010,7 @@ def M2_mpyu_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15948,7 +16023,7 @@ def M2_mpyu_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15961,7 +16036,7 @@ def M2_mpyu_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15974,7 +16049,7 @@ def M2_mpyu_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15987,7 +16062,7 @@ def M2_mpyu_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -16000,7 +16075,7 @@ def M2_mpyu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101010;
@@ -16012,7 +16087,7 @@ def M2_mpyud_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16023,7 +16098,7 @@ def M2_mpyud_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16034,7 +16109,7 @@ def M2_mpyud_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16045,7 +16120,7 @@ def M2_mpyud_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16056,7 +16131,7 @@ def M2_mpyud_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16067,7 +16142,7 @@ def M2_mpyud_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16078,7 +16153,7 @@ def M2_mpyud_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16089,7 +16164,7 @@ def M2_mpyud_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16100,7 +16175,7 @@ def M2_mpyud_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16110,7 +16185,7 @@ def M2_mpyud_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16120,7 +16195,7 @@ def M2_mpyud_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16130,7 +16205,7 @@ def M2_mpyud_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16140,7 +16215,7 @@ def M2_mpyud_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16150,7 +16225,7 @@ def M2_mpyud_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16160,7 +16235,7 @@ def M2_mpyud_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16170,7 +16245,7 @@ def M2_mpyud_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16180,7 +16255,7 @@ def M2_mpyud_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16191,7 +16266,7 @@ def M2_mpyud_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16202,7 +16277,7 @@ def M2_mpyud_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16213,7 +16288,7 @@ def M2_mpyud_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16224,7 +16299,7 @@ def M2_mpyud_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16235,7 +16310,7 @@ def M2_mpyud_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16246,7 +16321,7 @@ def M2_mpyud_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16257,7 +16332,7 @@ def M2_mpyud_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16268,7 +16343,7 @@ def M2_mpyui : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyui($Rs32,$Rt32)",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16278,7 +16353,7 @@ def M2_nacci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154 {
+tc_f675fee8, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16292,7 +16367,7 @@ def M2_naccii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 -= add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca {
+tc_f675fee8, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010100;
let hasNewValue = 1;
@@ -16310,7 +16385,7 @@ def M2_subacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
"$Rx32 += sub($Rt32,$Rs32)",
-tc_c74f796f, TypeM>, Enc_a568d4 {
+tc_f675fee8, TypeM>, Enc_a568d4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -16324,7 +16399,7 @@ def M2_vabsdiffh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16334,7 +16409,7 @@ def M2_vabsdiffw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16344,7 +16419,7 @@ def M2_vcmac_s0_sat_i : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16356,7 +16431,7 @@ def M2_vcmac_s0_sat_r : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16368,7 +16443,7 @@ def M2_vcmpy_s0_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16379,7 +16454,7 @@ def M2_vcmpy_s0_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16390,7 +16465,7 @@ def M2_vcmpy_s1_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -16401,7 +16476,7 @@ def M2_vcmpy_s1_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16412,7 +16487,7 @@ def M2_vdmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16424,7 +16499,7 @@ def M2_vdmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16436,7 +16511,7 @@ def M2_vdmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16449,7 +16524,7 @@ def M2_vdmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001100;
@@ -16462,7 +16537,7 @@ def M2_vdmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16473,7 +16548,7 @@ def M2_vdmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16484,7 +16559,7 @@ def M2_vmac2 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -16495,7 +16570,7 @@ def M2_vmac2es : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16506,7 +16581,7 @@ def M2_vmac2es_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16518,7 +16593,7 @@ def M2_vmac2es_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16530,7 +16605,7 @@ def M2_vmac2s_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -16542,7 +16617,7 @@ def M2_vmac2s_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -16554,7 +16629,7 @@ def M2_vmac2su_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -16566,7 +16641,7 @@ def M2_vmac2su_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111111;
@@ -16578,7 +16653,7 @@ def M2_vmpy2es_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16589,7 +16664,7 @@ def M2_vmpy2es_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16600,7 +16675,7 @@ def M2_vmpy2s_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16611,7 +16686,7 @@ def M2_vmpy2s_s0pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -16624,7 +16699,7 @@ def M2_vmpy2s_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16635,7 +16710,7 @@ def M2_vmpy2s_s1pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -16648,7 +16723,7 @@ def M2_vmpy2su_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16659,7 +16734,7 @@ def M2_vmpy2su_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16670,7 +16745,7 @@ def M2_vraddh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vraddh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001001;
@@ -16682,7 +16757,7 @@ def M2_vradduh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vradduh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16694,7 +16769,7 @@ def M2_vrcmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16705,7 +16780,7 @@ def M2_vrcmaci_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16716,7 +16791,7 @@ def M2_vrcmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16727,7 +16802,7 @@ def M2_vrcmacr_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -16738,7 +16813,7 @@ def M2_vrcmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16748,7 +16823,7 @@ def M2_vrcmpyi_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16758,7 +16833,7 @@ def M2_vrcmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16768,7 +16843,7 @@ def M2_vrcmpyr_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16778,7 +16853,7 @@ def M2_vrcmpys_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM> {
+tc_d773585a, TypeM> {
let isPseudo = 1;
let Constraints = "$Rxx32 = $Rxx32in";
}
@@ -16786,7 +16861,7 @@ def M2_vrcmpys_acc_s1_h : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -16798,7 +16873,7 @@ def M2_vrcmpys_acc_s1_l : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -16810,14 +16885,14 @@ def M2_vrcmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let isPseudo = 1;
}
def M2_vrcmpys_s1_h : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16828,7 +16903,7 @@ def M2_vrcmpys_s1_l : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -16839,7 +16914,7 @@ def M2_vrcmpys_s1rp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16848,7 +16923,7 @@ def M2_vrcmpys_s1rp_h : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16861,7 +16936,7 @@ def M2_vrcmpys_s1rp_l : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16874,7 +16949,7 @@ def M2_vrmac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16885,7 +16960,7 @@ def M2_vrmpy_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16895,7 +16970,7 @@ def M2_xor_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16909,7 +16984,7 @@ def M4_and_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16923,7 +16998,7 @@ def M4_and_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -16937,7 +17012,7 @@ def M4_and_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16951,7 +17026,7 @@ def M4_and_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16965,7 +17040,7 @@ def M4_cmpyi_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -16978,7 +17053,7 @@ def M4_cmpyi_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -16991,7 +17066,7 @@ def M4_cmpyr_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17004,7 +17079,7 @@ def M4_cmpyr_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17017,7 +17092,7 @@ def M4_mac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17032,7 +17107,7 @@ def M4_mpyri_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
"$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
-tc_16d0d8d5, TypeALU64>, Enc_322e1b, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_322e1b, ImmRegRel {
let Inst{31-24} = 0b11011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17048,7 +17123,7 @@ def M4_mpyri_addr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
-tc_16d0d8d5, TypeALU64>, Enc_420cf3, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_420cf3, ImmRegRel {
let Inst{31-23} = 0b110111111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17065,7 +17140,7 @@ def M4_mpyri_addr_u2 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
-tc_bcc96cee, TypeALU64>, Enc_277737 {
+tc_1a2fd869, TypeALU64>, Enc_277737 {
let Inst{31-23} = 0b110111110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17075,7 +17150,7 @@ def M4_mpyrr_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
-tc_e913dc32, TypeALU64>, Enc_a7b8e8, ImmRegRel {
+tc_d773585a, TypeALU64>, Enc_a7b8e8, ImmRegRel {
let Inst{31-23} = 0b110101110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17092,7 +17167,7 @@ def M4_mpyrr_addr : HInst<
(outs IntRegs:$Ry32),
(ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
"$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
-tc_e913dc32, TypeM>, Enc_7f1a05, ImmRegRel {
+tc_d773585a, TypeM>, Enc_7f1a05, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100011000;
@@ -17107,7 +17182,7 @@ def M4_nac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17122,7 +17197,7 @@ def M4_or_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -17136,7 +17211,7 @@ def M4_or_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17150,7 +17225,7 @@ def M4_or_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17164,7 +17239,7 @@ def M4_or_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17178,7 +17253,7 @@ def M4_pmpyw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = pmpyw($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17188,7 +17263,7 @@ def M4_pmpyw_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= pmpyw($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -17199,7 +17274,7 @@ def M4_vpmpyh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vpmpyh($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -17209,7 +17284,7 @@ def M4_vpmpyh_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111101;
@@ -17220,7 +17295,7 @@ def M4_vrmpyeh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17231,7 +17306,7 @@ def M4_vrmpyeh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -17242,7 +17317,7 @@ def M4_vrmpyeh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -17252,7 +17327,7 @@ def M4_vrmpyeh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17262,7 +17337,7 @@ def M4_vrmpyoh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -17273,7 +17348,7 @@ def M4_vrmpyoh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -17284,7 +17359,7 @@ def M4_vrmpyoh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -17294,7 +17369,7 @@ def M4_vrmpyoh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17304,7 +17379,7 @@ def M4_xor_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17318,7 +17393,7 @@ def M4_xor_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17332,7 +17407,7 @@ def M4_xor_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17346,7 +17421,7 @@ def M4_xor_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 ^= xor($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010100;
@@ -17357,7 +17432,7 @@ def M5_vdmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17369,7 +17444,7 @@ def M5_vdmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17380,7 +17455,7 @@ def M5_vmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybsu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -17391,7 +17466,7 @@ def M5_vmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -17402,7 +17477,7 @@ def M5_vmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybsu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17412,7 +17487,7 @@ def M5_vmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -17422,7 +17497,7 @@ def M5_vrmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -17433,7 +17508,7 @@ def M5_vrmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -17444,7 +17519,7 @@ def M5_vrmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17454,7 +17529,7 @@ def M5_vrmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -17464,7 +17539,7 @@ def M6_vabsdiffb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -17474,7 +17549,7 @@ def M6_vabsdiffub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17484,7 +17559,7 @@ def PS_loadrbabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17507,7 +17582,7 @@ def PS_loadrdabs : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17528,7 +17603,7 @@ def PS_loadrhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17551,7 +17626,7 @@ def PS_loadriabs : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17574,7 +17649,7 @@ def PS_loadrubabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17597,7 +17672,7 @@ def PS_loadruhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17620,7 +17695,7 @@ def PS_storerbabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17642,7 +17717,7 @@ def PS_storerbnewabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17668,7 +17743,7 @@ def PS_storerdabs : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17689,7 +17764,7 @@ def PS_storerfabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17710,7 +17785,7 @@ def PS_storerhabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17732,7 +17807,7 @@ def PS_storerhnewabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17758,7 +17833,7 @@ def PS_storeriabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
@@ -17780,7 +17855,7 @@ def PS_storerinewabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17806,7 +17881,7 @@ def S2_addasl_rrri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
"$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
-tc_c74f796f, TypeS_3op>, Enc_47ef61 {
+tc_f675fee8, TypeS_3op>, Enc_47ef61 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000100000;
let hasNewValue = 1;
@@ -17817,7 +17892,7 @@ def S2_allocframe : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u11_3Imm:$Ii),
"allocframe($Rx32,#$Ii):raw",
-tc_e216a5db, TypeST>, Enc_22c845 {
+tc_b44ecf75, TypeST>, Enc_22c845 {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10100000100;
let hasNewValue = 1;
@@ -17833,7 +17908,7 @@ def S2_asl_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asl($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000000000;
}
@@ -17841,7 +17916,7 @@ def S2_asl_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17851,7 +17926,7 @@ def S2_asl_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17861,7 +17936,7 @@ def S2_asl_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17871,7 +17946,7 @@ def S2_asl_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17881,7 +17956,7 @@ def S2_asl_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -17891,7 +17966,7 @@ def S2_asl_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -17902,7 +17977,7 @@ def S2_asl_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -17915,7 +17990,7 @@ def S2_asl_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -17928,7 +18003,7 @@ def S2_asl_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -17941,7 +18016,7 @@ def S2_asl_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -17954,7 +18029,7 @@ def S2_asl_i_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii):sat",
-tc_b44c6e2a, TypeS_2op>, Enc_a05677 {
+tc_779080bf, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -17967,7 +18042,7 @@ def S2_asl_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -17980,7 +18055,7 @@ def S2_asl_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vaslh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -17989,7 +18064,7 @@ def S2_asl_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vaslw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -17998,7 +18073,7 @@ def S2_asl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18007,7 +18082,7 @@ def S2_asl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18018,7 +18093,7 @@ def S2_asl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18029,7 +18104,7 @@ def S2_asl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18040,7 +18115,7 @@ def S2_asl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18051,7 +18126,7 @@ def S2_asl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18062,7 +18137,7 @@ def S2_asl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18073,7 +18148,7 @@ def S2_asl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18086,7 +18161,7 @@ def S2_asl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18099,7 +18174,7 @@ def S2_asl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18112,7 +18187,7 @@ def S2_asl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18125,7 +18200,7 @@ def S2_asl_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18138,7 +18213,7 @@ def S2_asl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18147,7 +18222,7 @@ def S2_asl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18156,7 +18231,7 @@ def S2_asr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000000000;
}
@@ -18164,7 +18239,7 @@ def S2_asr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18174,7 +18249,7 @@ def S2_asr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18184,7 +18259,7 @@ def S2_asr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18194,7 +18269,7 @@ def S2_asr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18204,7 +18279,7 @@ def S2_asr_i_p_rnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18213,14 +18288,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let isPseudo = 1;
}
def S2_asr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -18231,7 +18306,7 @@ def S2_asr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18244,7 +18319,7 @@ def S2_asr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18257,7 +18332,7 @@ def S2_asr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18270,7 +18345,7 @@ def S2_asr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18283,7 +18358,7 @@ def S2_asr_i_r_rnd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -18295,7 +18370,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asrrnd($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op> {
+tc_002cb246, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -18304,7 +18379,7 @@ def S2_asr_i_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rd32 = vasrw($Rss32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_8dec2e {
+tc_4414d8b1, TypeS_2op>, Enc_8dec2e {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -18316,7 +18391,7 @@ def S2_asr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -18325,7 +18400,7 @@ def S2_asr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vasrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -18334,7 +18409,7 @@ def S2_asr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18343,7 +18418,7 @@ def S2_asr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18354,7 +18429,7 @@ def S2_asr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18365,7 +18440,7 @@ def S2_asr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18376,7 +18451,7 @@ def S2_asr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18387,7 +18462,7 @@ def S2_asr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18398,7 +18473,7 @@ def S2_asr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18409,7 +18484,7 @@ def S2_asr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18422,7 +18497,7 @@ def S2_asr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18435,7 +18510,7 @@ def S2_asr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18448,7 +18523,7 @@ def S2_asr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18461,7 +18536,7 @@ def S2_asr_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18474,7 +18549,7 @@ def S2_asr_r_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vasrw($Rss32,$Rt32)",
-tc_1b9c9ee5, TypeS_3op>, Enc_3d5b28 {
+tc_4414d8b1, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -18486,7 +18561,7 @@ def S2_asr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18495,7 +18570,7 @@ def S2_asr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18504,7 +18579,7 @@ def S2_brev : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = brev($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18515,7 +18590,7 @@ def S2_brevp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = brev($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18524,7 +18599,7 @@ def S2_cabacdecbin : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = decbin($Rss32,$Rtt32)",
-tc_c6ebf8dd, TypeS_3op>, Enc_a56825 {
+tc_76851da1, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -18536,7 +18611,7 @@ def S2_cl0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18547,7 +18622,7 @@ def S2_cl0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18558,7 +18633,7 @@ def S2_cl1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18569,7 +18644,7 @@ def S2_cl1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18580,7 +18655,7 @@ def S2_clb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = clb($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18591,7 +18666,7 @@ def S2_clbnorm : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = normamt($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18602,7 +18677,7 @@ def S2_clbp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = clb($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18613,7 +18688,7 @@ def S2_clrbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = clrbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -18624,7 +18699,7 @@ def S2_clrbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = clrbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -18635,7 +18710,7 @@ def S2_ct0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18646,7 +18721,7 @@ def S2_ct0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18657,7 +18732,7 @@ def S2_ct1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18668,7 +18743,7 @@ def S2_ct1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18679,7 +18754,7 @@ def S2_deinterleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = deinterleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18688,7 +18763,7 @@ def S2_extractu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extractu($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011010;
let hasNewValue = 1;
@@ -18699,7 +18774,7 @@ def S2_extractu_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extractu($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -18711,7 +18786,7 @@ def S2_extractup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extractu($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10000001;
let prefersSlot3 = 1;
}
@@ -18719,7 +18794,7 @@ def S2_extractup_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extractu($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -18729,7 +18804,7 @@ def S2_insert : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = insert($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_a1e29d {
+tc_bfec0f01, TypeS_2op>, Enc_a1e29d {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011110;
let hasNewValue = 1;
@@ -18741,7 +18816,7 @@ def S2_insert_rp : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rx32 = insert($Rs32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_179b35 {
+tc_f429765c, TypeS_3op>, Enc_179b35 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001000000;
@@ -18754,7 +18829,7 @@ def S2_insertp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rxx32 = insert($Rss32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_143a3c {
+tc_bfec0f01, TypeS_2op>, Enc_143a3c {
let Inst{31-24} = 0b10000011;
let prefersSlot3 = 1;
let Constraints = "$Rxx32 = $Rxx32in";
@@ -18763,7 +18838,7 @@ def S2_insertp_rp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 = insert($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010000;
@@ -18774,7 +18849,7 @@ def S2_interleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = interleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18783,7 +18858,7 @@ def S2_lfsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = lfs($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -18793,7 +18868,7 @@ def S2_lsl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18802,7 +18877,7 @@ def S2_lsl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18813,7 +18888,7 @@ def S2_lsl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18824,7 +18899,7 @@ def S2_lsl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18835,7 +18910,7 @@ def S2_lsl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18846,7 +18921,7 @@ def S2_lsl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18857,7 +18932,7 @@ def S2_lsl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18868,7 +18943,7 @@ def S2_lsl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18881,7 +18956,7 @@ def S2_lsl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18894,7 +18969,7 @@ def S2_lsl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18907,7 +18982,7 @@ def S2_lsl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18920,7 +18995,7 @@ def S2_lsl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18929,7 +19004,7 @@ def S2_lsl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18938,7 +19013,7 @@ def S2_lsr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = lsr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000000000;
}
@@ -18946,7 +19021,7 @@ def S2_lsr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18956,7 +19031,7 @@ def S2_lsr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18966,7 +19041,7 @@ def S2_lsr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18976,7 +19051,7 @@ def S2_lsr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18986,7 +19061,7 @@ def S2_lsr_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -18996,7 +19071,7 @@ def S2_lsr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = lsr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -19007,7 +19082,7 @@ def S2_lsr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19020,7 +19095,7 @@ def S2_lsr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19033,7 +19108,7 @@ def S2_lsr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19046,7 +19121,7 @@ def S2_lsr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19059,7 +19134,7 @@ def S2_lsr_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -19072,7 +19147,7 @@ def S2_lsr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vlsrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b001;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -19081,7 +19156,7 @@ def S2_lsr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vlsrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -19090,7 +19165,7 @@ def S2_lsr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -19099,7 +19174,7 @@ def S2_lsr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -19110,7 +19185,7 @@ def S2_lsr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -19121,7 +19196,7 @@ def S2_lsr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -19132,7 +19207,7 @@ def S2_lsr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -19143,7 +19218,7 @@ def S2_lsr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -19154,7 +19229,7 @@ def S2_lsr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -19165,7 +19240,7 @@ def S2_lsr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -19178,7 +19253,7 @@ def S2_lsr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -19191,7 +19266,7 @@ def S2_lsr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -19204,7 +19279,7 @@ def S2_lsr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -19217,7 +19292,7 @@ def S2_lsr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -19226,16 +19301,28 @@ def S2_lsr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
}
+def S2_mask : HInst<
+(outs IntRegs:$Rd32),
+(ins u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = mask(#$Ii,#$II)",
+tc_9461ff31, TypeS_2op>, Enc_c85e2a, Requires<[HasV66]> {
+let Inst{13-13} = 0b1;
+let Inst{20-16} = 0b00000;
+let Inst{31-23} = 0b100011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
def S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5 {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101100;
@@ -19245,7 +19332,7 @@ def S2_parityp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = parity($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeALU64>, Enc_d2216a {
+tc_002cb246, TypeALU64>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010000000;
@@ -19257,7 +19344,7 @@ def S2_pstorerbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100000;
let isPredicated = 1;
@@ -19279,7 +19366,7 @@ def S2_pstorerbf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19297,7 +19384,7 @@ def S2_pstorerbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19305,7 +19392,7 @@ def S2_pstorerbfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19324,7 +19411,7 @@ def S2_pstorerbnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000100101;
@@ -19350,7 +19437,7 @@ def S2_pstorerbnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19372,7 +19459,7 @@ def S2_pstorerbnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19381,7 +19468,7 @@ def S2_pstorerbnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19404,7 +19491,7 @@ def S2_pstorerbnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000000101;
@@ -19429,7 +19516,7 @@ def S2_pstorerbnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19450,7 +19537,7 @@ def S2_pstorerbnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19459,7 +19546,7 @@ def S2_pstorerbnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19481,7 +19568,7 @@ def S2_pstorerbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000000;
let isPredicated = 1;
@@ -19502,7 +19589,7 @@ def S2_pstorerbt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19519,7 +19606,7 @@ def S2_pstorerbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19527,7 +19614,7 @@ def S2_pstorerbtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19545,7 +19632,7 @@ def S2_pstorerdf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100110;
let isPredicated = 1;
@@ -19566,7 +19653,7 @@ def S2_pstorerdf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19584,7 +19671,7 @@ def S2_pstorerdf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19592,7 +19679,7 @@ def S2_pstorerdfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19611,7 +19698,7 @@ def S2_pstorerdt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000110;
let isPredicated = 1;
@@ -19631,7 +19718,7 @@ def S2_pstorerdt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19648,7 +19735,7 @@ def S2_pstorerdt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19656,7 +19743,7 @@ def S2_pstorerdtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19674,7 +19761,7 @@ def S2_pstorerff_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100011;
let isPredicated = 1;
@@ -19695,7 +19782,7 @@ def S2_pstorerff_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19713,7 +19800,7 @@ def S2_pstorerff_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19721,7 +19808,7 @@ def S2_pstorerffnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19740,7 +19827,7 @@ def S2_pstorerft_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000011;
let isPredicated = 1;
@@ -19760,7 +19847,7 @@ def S2_pstorerft_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19777,7 +19864,7 @@ def S2_pstorerft_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19785,7 +19872,7 @@ def S2_pstorerftnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19803,7 +19890,7 @@ def S2_pstorerhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100010;
let isPredicated = 1;
@@ -19825,7 +19912,7 @@ def S2_pstorerhf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19843,7 +19930,7 @@ def S2_pstorerhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19851,7 +19938,7 @@ def S2_pstorerhfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19870,7 +19957,7 @@ def S2_pstorerhnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000100101;
@@ -19896,7 +19983,7 @@ def S2_pstorerhnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -19918,7 +20005,7 @@ def S2_pstorerhnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19927,7 +20014,7 @@ def S2_pstorerhnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -19950,7 +20037,7 @@ def S2_pstorerhnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000000101;
@@ -19975,7 +20062,7 @@ def S2_pstorerhnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -19996,7 +20083,7 @@ def S2_pstorerhnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20005,7 +20092,7 @@ def S2_pstorerhnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -20027,7 +20114,7 @@ def S2_pstorerht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000010;
let isPredicated = 1;
@@ -20048,7 +20135,7 @@ def S2_pstorerht_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20065,7 +20152,7 @@ def S2_pstorerht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20073,7 +20160,7 @@ def S2_pstorerhtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20091,7 +20178,7 @@ def S2_pstorerif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100100;
let isPredicated = 1;
@@ -20113,7 +20200,7 @@ def S2_pstorerif_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20131,7 +20218,7 @@ def S2_pstorerif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20139,7 +20226,7 @@ def S2_pstorerifnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20159,7 +20246,7 @@ def S2_pstorerinewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000100101;
@@ -20185,7 +20272,7 @@ def S2_pstorerinewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20207,7 +20294,7 @@ def S2_pstorerinewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20216,7 +20303,7 @@ def S2_pstorerinewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20239,7 +20326,7 @@ def S2_pstorerinewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000000101;
@@ -20264,7 +20351,7 @@ def S2_pstorerinewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20285,7 +20372,7 @@ def S2_pstorerinewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20294,7 +20381,7 @@ def S2_pstorerinewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20316,7 +20403,7 @@ def S2_pstorerit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000100;
let isPredicated = 1;
@@ -20337,7 +20424,7 @@ def S2_pstorerit_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20354,7 +20441,7 @@ def S2_pstorerit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20362,7 +20449,7 @@ def S2_pstoreritnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20380,7 +20467,7 @@ def S2_setbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = setbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -20391,7 +20478,7 @@ def S2_setbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = setbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -20402,7 +20489,7 @@ def S2_shuffeb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20411,7 +20498,7 @@ def S2_shuffeh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20420,7 +20507,7 @@ def S2_shuffob : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffob($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20429,7 +20516,7 @@ def S2_shuffoh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffoh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -20438,7 +20525,7 @@ def S2_storerb_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20459,9 +20546,10 @@ def S2_storerb_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111000;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let mayStore = 1;
let BaseOpcode = "S2_storerb_pbr";
@@ -20472,7 +20560,7 @@ def S2_storerb_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_b15941, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_b15941, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001000;
@@ -20488,7 +20576,7 @@ def S2_storerb_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001000;
let addrMode = PostInc;
@@ -20503,7 +20591,7 @@ def S2_storerb_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20521,7 +20609,7 @@ def S2_storerb_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101000;
let addrMode = PostInc;
@@ -20534,7 +20622,7 @@ def S2_storerb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20542,7 +20630,7 @@ def S2_storerbgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let accessSize = ByteAccess;
@@ -20560,7 +20648,7 @@ def S2_storerbnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_4df4e9, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_4df4e9, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -20585,10 +20673,11 @@ def S2_storerbnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = ByteAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -20602,7 +20691,7 @@ def S2_storerbnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_96ce4f, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_96ce4f, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b00;
@@ -20622,7 +20711,7 @@ def S2_storerbnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101001101;
@@ -20641,7 +20730,7 @@ def S2_storerbnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_c7cd90, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_c7cd90, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b000;
@@ -20662,7 +20751,7 @@ def S2_storerbnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -20679,7 +20768,7 @@ def S2_storerbnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memb($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -20688,7 +20777,7 @@ def S2_storerbnewgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -20710,7 +20799,7 @@ def S2_storerd_io : HInst<
(outs),
(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+#$Ii) = $Rtt32",
-tc_05b6c987, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20730,9 +20819,10 @@ def S2_storerd_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2:brev) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111110;
+let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayStore = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -20741,7 +20831,7 @@ def S2_storerd_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
-tc_9fdb5406, TypeST>, Enc_395cc4 {
+tc_e86aa961, TypeST>, Enc_395cc4 {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001110;
@@ -20755,7 +20845,7 @@ def S2_storerd_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++I:circ($Mu2)) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001110;
let addrMode = PostInc;
@@ -20768,7 +20858,7 @@ def S2_storerd_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20785,7 +20875,7 @@ def S2_storerd_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101110;
let addrMode = PostInc;
@@ -20797,7 +20887,7 @@ def S2_storerd_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd($Rs32) = $Rtt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20805,7 +20895,7 @@ def S2_storerdgp : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(gp+#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -20822,7 +20912,7 @@ def S2_storerf_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32.h",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20842,9 +20932,10 @@ def S2_storerf_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111011;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -20853,7 +20944,7 @@ def S2_storerf_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
-tc_9fdb5406, TypeST>, Enc_935d9b {
+tc_e86aa961, TypeST>, Enc_935d9b {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001011;
@@ -20867,7 +20958,7 @@ def S2_storerf_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001011;
let addrMode = PostInc;
@@ -20880,7 +20971,7 @@ def S2_storerf_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20897,7 +20988,7 @@ def S2_storerf_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101011;
let addrMode = PostInc;
@@ -20909,7 +21000,7 @@ def S2_storerf_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32.h",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20917,7 +21008,7 @@ def S2_storerfgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -20934,7 +21025,7 @@ def S2_storerh_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -20955,9 +21046,10 @@ def S2_storerh_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111010;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
let BaseOpcode = "S2_storerh_pbr";
@@ -20968,7 +21060,7 @@ def S2_storerh_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_935d9b, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_935d9b, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001010;
@@ -20984,7 +21076,7 @@ def S2_storerh_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001010;
let addrMode = PostInc;
@@ -20999,7 +21091,7 @@ def S2_storerh_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21017,7 +21109,7 @@ def S2_storerh_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101010;
let addrMode = PostInc;
@@ -21030,7 +21122,7 @@ def S2_storerh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21038,7 +21130,7 @@ def S2_storerhgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -21056,7 +21148,7 @@ def S2_storerhnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_0d8870, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_0d8870, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21081,10 +21173,11 @@ def S2_storerhnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = HalfWordAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -21098,7 +21191,7 @@ def S2_storerhnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_91b9fe, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_91b9fe, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b01;
@@ -21118,7 +21211,7 @@ def S2_storerhnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101001101;
@@ -21137,7 +21230,7 @@ def S2_storerhnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_e26546, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_e26546, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b001;
@@ -21158,7 +21251,7 @@ def S2_storerhnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -21175,7 +21268,7 @@ def S2_storerhnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memh($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21184,7 +21277,7 @@ def S2_storerhnewgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21206,7 +21299,7 @@ def S2_storeri_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
@@ -21227,9 +21320,10 @@ def S2_storeri_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111100;
+let addrMode = PostInc;
let accessSize = WordAccess;
let mayStore = 1;
let BaseOpcode = "S2_storeri_pbr";
@@ -21240,7 +21334,7 @@ def S2_storeri_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_79b8c8, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_79b8c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001100;
@@ -21256,7 +21350,7 @@ def S2_storeri_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001100;
let addrMode = PostInc;
@@ -21271,7 +21365,7 @@ def S2_storeri_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21289,7 +21383,7 @@ def S2_storeri_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101100;
let addrMode = PostInc;
@@ -21302,7 +21396,7 @@ def S2_storeri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21310,7 +21404,7 @@ def S2_storerigp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let accessSize = WordAccess;
@@ -21328,7 +21422,7 @@ def S2_storerinew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_690862, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_690862, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21353,10 +21447,11 @@ def S2_storerinew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
let accessSize = WordAccess;
let isNVStore = 1;
let isNewValue = 1;
@@ -21370,7 +21465,7 @@ def S2_storerinew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_3f97c8, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_3f97c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b10;
@@ -21390,7 +21485,7 @@ def S2_storerinew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101001101;
@@ -21409,7 +21504,7 @@ def S2_storerinew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_223005, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_223005, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b010;
@@ -21429,7 +21524,7 @@ def S2_storerinew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -21446,7 +21541,7 @@ def S2_storerinew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memw($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21455,7 +21550,7 @@ def S2_storerinewgp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21477,7 +21572,7 @@ def S2_storew_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw_locked($Rs32,$Pd4) = $Rt32",
-tc_1372bca1, TypeST>, Enc_c2b48e {
+tc_5abb5e3f, TypeST>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000101;
@@ -21490,7 +21585,7 @@ def S2_svsathb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21501,7 +21596,7 @@ def S2_svsathub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21512,7 +21607,7 @@ def S2_tableidxb : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21523,7 +21618,7 @@ def S2_tableidxb_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21534,7 +21629,7 @@ def S2_tableidxd : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21545,7 +21640,7 @@ def S2_tableidxd_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21555,7 +21650,7 @@ def S2_tableidxh : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21566,7 +21661,7 @@ def S2_tableidxh_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21576,7 +21671,7 @@ def S2_tableidxw : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21587,7 +21682,7 @@ def S2_tableidxw_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21597,7 +21692,7 @@ def S2_togglebit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = togglebit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -21608,7 +21703,7 @@ def S2_togglebit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = togglebit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -21619,7 +21714,7 @@ def S2_tstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101000;
@@ -21628,7 +21723,7 @@ def S2_tstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111000;
@@ -21637,7 +21732,7 @@ def S2_valignib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
"$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_729ff7 {
+tc_b4b5c03a, TypeS_3op>, Enc_729ff7 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000000;
}
@@ -21645,7 +21740,7 @@ def S2_valignrb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
"$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_8c6530 {
+tc_b4b5c03a, TypeS_3op>, Enc_8c6530 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010000;
@@ -21654,7 +21749,7 @@ def S2_vcnegh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcnegh($Rss32,$Rt32)",
-tc_b44c6e2a, TypeS_3op>, Enc_927852 {
+tc_779080bf, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21665,7 +21760,7 @@ def S2_vcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcrotate($Rss32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_927852 {
+tc_002cb246, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21676,7 +21771,7 @@ def S2_vrcnegh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcnegh($Rss32,$Rt32)",
-tc_e913dc32, TypeS_3op>, Enc_1aa186 {
+tc_d773585a, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -21687,7 +21782,7 @@ def S2_vrndpackwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21698,7 +21793,7 @@ def S2_vrndpackwhs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21710,7 +21805,7 @@ def S2_vsathb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21721,7 +21816,7 @@ def S2_vsathb_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21730,7 +21825,7 @@ def S2_vsathub : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21741,7 +21836,7 @@ def S2_vsathub_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21750,7 +21845,7 @@ def S2_vsatwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21761,7 +21856,7 @@ def S2_vsatwh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21770,7 +21865,7 @@ def S2_vsatwuh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21781,7 +21876,7 @@ def S2_vsatwuh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21790,7 +21885,7 @@ def S2_vsplatrb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsplatb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -21802,7 +21897,7 @@ def S2_vsplatrh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100010;
let isReMaterializable = 1;
@@ -21812,7 +21907,7 @@ def S2_vspliceib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
"$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_d50cd3 {
+tc_b4b5c03a, TypeS_3op>, Enc_d50cd3 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000100;
}
@@ -21820,7 +21915,7 @@ def S2_vsplicerb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
"$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_dbd70c {
+tc_b4b5c03a, TypeS_3op>, Enc_dbd70c {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010100;
@@ -21829,7 +21924,7 @@ def S2_vsxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21839,7 +21934,7 @@ def S2_vsxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21849,7 +21944,7 @@ def S2_vtrunehb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunehb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21859,7 +21954,7 @@ def S2_vtrunewh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunewh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21868,7 +21963,7 @@ def S2_vtrunohb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunohb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21878,7 +21973,7 @@ def S2_vtrunowh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunowh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21887,7 +21982,7 @@ def S2_vzxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21897,7 +21992,7 @@ def S2_vzxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21907,7 +22002,7 @@ def S4_addaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,add($Ru32,#$Ii))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
let Inst{31-23} = 0b110110110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21922,7 +22017,7 @@ def S4_addi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -21940,7 +22035,7 @@ def S4_addi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -21958,7 +22053,7 @@ def S4_andi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -21976,7 +22071,7 @@ def S4_andi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -21994,7 +22089,7 @@ def S4_clbaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rs32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_9fae8a {
+tc_002cb246, TypeS_2op>, Enc_9fae8a {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10001100001;
let hasNewValue = 1;
@@ -22005,7 +22100,7 @@ def S4_clbpaddi : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rss32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a1640c {
+tc_002cb246, TypeS_2op>, Enc_a1640c {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22016,7 +22111,7 @@ def S4_clbpnorm : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = normamt($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22027,7 +22122,7 @@ def S4_extract : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extract($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011011;
let hasNewValue = 1;
@@ -22038,7 +22133,7 @@ def S4_extract_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extract($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -22050,7 +22145,7 @@ def S4_extractp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extract($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10001010;
let prefersSlot3 = 1;
}
@@ -22058,7 +22153,7 @@ def S4_extractp_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extract($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -22068,7 +22163,7 @@ def S4_lsli : HInst<
(outs IntRegs:$Rd32),
(ins s6_0Imm:$Ii, IntRegs:$Rt32),
"$Rd32 = lsl(#$Ii,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_fef969 {
+tc_946df596, TypeS_3op>, Enc_fef969 {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -22079,7 +22174,7 @@ def S4_ntstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = !tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101001;
@@ -22088,7 +22183,7 @@ def S4_ntstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111001;
@@ -22097,7 +22192,7 @@ def S4_or_andi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= and($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22114,7 +22209,7 @@ def S4_or_andix : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
"$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
-tc_84df2cd3, TypeALU64>, Enc_b4e6cf {
+tc_f429765c, TypeALU64>, Enc_b4e6cf {
let Inst{31-22} = 0b1101101001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22130,7 +22225,7 @@ def S4_or_ori : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= or($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22147,7 +22242,7 @@ def S4_ori_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -22165,7 +22260,7 @@ def S4_ori_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -22183,7 +22278,7 @@ def S4_parity : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = parity($Rs32,$Rt32)",
-tc_2b6f77c6, TypeALU64>, Enc_5ab2be {
+tc_002cb246, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101111;
@@ -22195,7 +22290,7 @@ def S4_pstorerbf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22220,7 +22315,7 @@ def S4_pstorerbf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22236,7 +22331,7 @@ def S4_pstorerbfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22262,7 +22357,7 @@ def S4_pstorerbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110000;
let isPredicated = 1;
@@ -22285,7 +22380,7 @@ def S4_pstorerbfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22302,7 +22397,7 @@ def S4_pstorerbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22310,7 +22405,7 @@ def S4_pstorerbnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22338,7 +22433,7 @@ def S4_pstorerbnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -22358,7 +22453,7 @@ def S4_pstorerbnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22387,7 +22482,7 @@ def S4_pstorerbnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000110101;
@@ -22414,7 +22509,7 @@ def S4_pstorerbnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -22435,7 +22530,7 @@ def S4_pstorerbnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22444,7 +22539,7 @@ def S4_pstorerbnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22471,7 +22566,7 @@ def S4_pstorerbnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -22490,7 +22585,7 @@ def S4_pstorerbnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22518,7 +22613,7 @@ def S4_pstorerbnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000010101;
@@ -22544,7 +22639,7 @@ def S4_pstorerbnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -22564,7 +22659,7 @@ def S4_pstorerbnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22573,7 +22668,7 @@ def S4_pstorerbt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22597,7 +22692,7 @@ def S4_pstorerbt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22612,7 +22707,7 @@ def S4_pstorerbtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22637,7 +22732,7 @@ def S4_pstorerbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010000;
let isPredicated = 1;
@@ -22659,7 +22754,7 @@ def S4_pstorerbtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22675,7 +22770,7 @@ def S4_pstorerbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22683,7 +22778,7 @@ def S4_pstorerdf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22707,7 +22802,7 @@ def S4_pstorerdf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110101110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22722,7 +22817,7 @@ def S4_pstorerdfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22747,7 +22842,7 @@ def S4_pstorerdfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110110;
let isPredicated = 1;
@@ -22769,7 +22864,7 @@ def S4_pstorerdfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110111110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22785,7 +22880,7 @@ def S4_pstorerdfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22793,7 +22888,7 @@ def S4_pstorerdt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22816,7 +22911,7 @@ def S4_pstorerdt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110100110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22830,7 +22925,7 @@ def S4_pstorerdtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22854,7 +22949,7 @@ def S4_pstorerdtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010110;
let isPredicated = 1;
@@ -22875,7 +22970,7 @@ def S4_pstorerdtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110110110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -22890,7 +22985,7 @@ def S4_pstorerdtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22898,7 +22993,7 @@ def S4_pstorerff_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22922,7 +23017,7 @@ def S4_pstorerff_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22937,7 +23032,7 @@ def S4_pstorerffnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22962,7 +23057,7 @@ def S4_pstorerffnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110011;
let isPredicated = 1;
@@ -22984,7 +23079,7 @@ def S4_pstorerffnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23000,7 +23095,7 @@ def S4_pstorerffnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23008,7 +23103,7 @@ def S4_pstorerft_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23031,7 +23126,7 @@ def S4_pstorerft_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23045,7 +23140,7 @@ def S4_pstorerftnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23069,7 +23164,7 @@ def S4_pstorerftnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010011;
let isPredicated = 1;
@@ -23090,7 +23185,7 @@ def S4_pstorerftnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23105,7 +23200,7 @@ def S4_pstorerftnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23113,7 +23208,7 @@ def S4_pstorerhf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23138,7 +23233,7 @@ def S4_pstorerhf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23154,7 +23249,7 @@ def S4_pstorerhfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23180,7 +23275,7 @@ def S4_pstorerhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110010;
let isPredicated = 1;
@@ -23203,7 +23298,7 @@ def S4_pstorerhfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23220,7 +23315,7 @@ def S4_pstorerhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23228,7 +23323,7 @@ def S4_pstorerhnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23256,7 +23351,7 @@ def S4_pstorerhnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23276,7 +23371,7 @@ def S4_pstorerhnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23305,7 +23400,7 @@ def S4_pstorerhnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000110101;
@@ -23332,7 +23427,7 @@ def S4_pstorerhnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23353,7 +23448,7 @@ def S4_pstorerhnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23362,7 +23457,7 @@ def S4_pstorerhnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23389,7 +23484,7 @@ def S4_pstorerhnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23408,7 +23503,7 @@ def S4_pstorerhnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23436,7 +23531,7 @@ def S4_pstorerhnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000010101;
@@ -23462,7 +23557,7 @@ def S4_pstorerhnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -23482,7 +23577,7 @@ def S4_pstorerhnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23491,7 +23586,7 @@ def S4_pstorerht_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23515,7 +23610,7 @@ def S4_pstorerht_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23530,7 +23625,7 @@ def S4_pstorerhtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23555,7 +23650,7 @@ def S4_pstorerhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010010;
let isPredicated = 1;
@@ -23577,7 +23672,7 @@ def S4_pstorerhtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -23593,7 +23688,7 @@ def S4_pstorerhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23601,7 +23696,7 @@ def S4_pstorerif_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23626,7 +23721,7 @@ def S4_pstorerif_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23642,7 +23737,7 @@ def S4_pstorerifnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23668,7 +23763,7 @@ def S4_pstorerifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110100;
let isPredicated = 1;
@@ -23691,7 +23786,7 @@ def S4_pstorerifnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23708,7 +23803,7 @@ def S4_pstorerifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23716,7 +23811,7 @@ def S4_pstorerinewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23744,7 +23839,7 @@ def S4_pstorerinewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23764,7 +23859,7 @@ def S4_pstorerinewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -23793,7 +23888,7 @@ def S4_pstorerinewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000110101;
@@ -23820,7 +23915,7 @@ def S4_pstorerinewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23841,7 +23936,7 @@ def S4_pstorerinewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23850,7 +23945,7 @@ def S4_pstorerinewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23877,7 +23972,7 @@ def S4_pstorerinewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23896,7 +23991,7 @@ def S4_pstorerinewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -23924,7 +24019,7 @@ def S4_pstorerinewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000010101;
@@ -23950,7 +24045,7 @@ def S4_pstorerinewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -23970,7 +24065,7 @@ def S4_pstorerinewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23979,7 +24074,7 @@ def S4_pstorerit_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -24003,7 +24098,7 @@ def S4_pstorerit_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -24018,7 +24113,7 @@ def S4_pstoreritnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -24043,7 +24138,7 @@ def S4_pstoreritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010100;
let isPredicated = 1;
@@ -24065,7 +24160,7 @@ def S4_pstoreritnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
@@ -24081,7 +24176,7 @@ def S4_pstoreritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24089,7 +24184,7 @@ def S4_stored_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd_locked($Rs32,$Pd4) = $Rtt32",
-tc_1372bca1, TypeST>, Enc_d7dc10 {
+tc_5abb5e3f, TypeST>, Enc_d7dc10 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000111;
@@ -24102,7 +24197,7 @@ def S4_storeirb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"memb($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_8203bb, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_8203bb, PredNewRel {
let Inst{31-21} = 0b00111100000;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -24121,7 +24216,7 @@ def S4_storeirb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memb($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24129,7 +24224,7 @@ def S4_storeirbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24149,7 +24244,7 @@ def S4_storeirbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24157,7 +24252,7 @@ def S4_storeirbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24178,7 +24273,7 @@ def S4_storeirbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24186,7 +24281,7 @@ def S4_storeirbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24205,7 +24300,7 @@ def S4_storeirbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24213,7 +24308,7 @@ def S4_storeirbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24233,7 +24328,7 @@ def S4_storeirbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24241,7 +24336,7 @@ def S4_storeirh_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"memh($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_a803e0, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_a803e0, PredNewRel {
let Inst{31-21} = 0b00111100001;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
@@ -24260,7 +24355,7 @@ def S4_storeirh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memh($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24268,7 +24363,7 @@ def S4_storeirhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24288,7 +24383,7 @@ def S4_storeirhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24296,7 +24391,7 @@ def S4_storeirhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24317,7 +24412,7 @@ def S4_storeirhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24325,7 +24420,7 @@ def S4_storeirht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24344,7 +24439,7 @@ def S4_storeirht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24352,7 +24447,7 @@ def S4_storeirhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24372,7 +24467,7 @@ def S4_storeirhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24380,7 +24475,7 @@ def S4_storeiri_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"memw($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_f37377, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_f37377, PredNewRel {
let Inst{31-21} = 0b00111100010;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -24399,7 +24494,7 @@ def S4_storeiri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memw($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24407,7 +24502,7 @@ def S4_storeirif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24427,7 +24522,7 @@ def S4_storeirif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24435,7 +24530,7 @@ def S4_storeirifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24456,7 +24551,7 @@ def S4_storeirifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24464,7 +24559,7 @@ def S4_storeirit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24483,7 +24578,7 @@ def S4_storeirit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24491,7 +24586,7 @@ def S4_storeiritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
@@ -24511,7 +24606,7 @@ def S4_storeiritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24519,7 +24614,7 @@ def S4_storerb_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memb($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011000;
@@ -24540,7 +24635,7 @@ def S4_storerb_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011000;
let addrMode = BaseRegOffset;
@@ -24556,7 +24651,7 @@ def S4_storerb_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memb($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101000;
let addrMode = BaseLongOffset;
@@ -24578,7 +24673,7 @@ def S4_storerbnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memb($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10101011101;
@@ -24602,7 +24697,7 @@ def S4_storerbnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0000;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24621,7 +24716,7 @@ def S4_storerbnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memb($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -24646,7 +24741,7 @@ def S4_storerd_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Re32=#$II) = $Rtt32",
-tc_66888ded, TypeST>, Enc_c7a204 {
+tc_da4a37ed, TypeST>, Enc_c7a204 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011110;
@@ -24666,7 +24761,7 @@ def S4_storerd_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_d9709180, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011110;
let addrMode = BaseRegOffset;
@@ -24681,7 +24776,7 @@ def S4_storerd_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Ru32<<#$Ii+#$II) = $Rtt32",
-tc_0dc560de, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101110;
let addrMode = BaseLongOffset;
@@ -24702,7 +24797,7 @@ def S4_storerf_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_8bcba4 {
+tc_da4a37ed, TypeST>, Enc_8bcba4 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011011;
@@ -24722,7 +24817,7 @@ def S4_storerf_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011011;
let addrMode = BaseRegOffset;
@@ -24737,7 +24832,7 @@ def S4_storerf_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32.h",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101011;
let addrMode = BaseLongOffset;
@@ -24758,7 +24853,7 @@ def S4_storerh_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011010;
@@ -24779,7 +24874,7 @@ def S4_storerh_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011010;
let addrMode = BaseRegOffset;
@@ -24795,7 +24890,7 @@ def S4_storerh_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101010;
let addrMode = BaseLongOffset;
@@ -24817,7 +24912,7 @@ def S4_storerhnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memh($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b001;
let Inst{31-21} = 0b10101011101;
@@ -24841,7 +24936,7 @@ def S4_storerhnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0001;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24860,7 +24955,7 @@ def S4_storerhnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memh($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -24885,7 +24980,7 @@ def S4_storeri_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memw($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011100;
@@ -24906,7 +25001,7 @@ def S4_storeri_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011100;
let addrMode = BaseRegOffset;
@@ -24922,7 +25017,7 @@ def S4_storeri_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memw($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101100;
let addrMode = BaseLongOffset;
@@ -24944,7 +25039,7 @@ def S4_storerinew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memw($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b010;
let Inst{31-21} = 0b10101011101;
@@ -24968,7 +25063,7 @@ def S4_storerinew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0010;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24987,7 +25082,7 @@ def S4_storerinew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memw($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -25012,7 +25107,7 @@ def S4_subaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
"$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
let Inst{31-23} = 0b110110111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25027,7 +25122,7 @@ def S4_subi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -25045,7 +25140,7 @@ def S4_subi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -25063,7 +25158,7 @@ def S4_vrcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_b9c0b731, TypeS_3op>, Enc_645d54 {
+tc_13bfbcf9, TypeS_3op>, Enc_645d54 {
let Inst{7-6} = 0b11;
let Inst{31-21} = 0b11000011110;
let prefersSlot3 = 1;
@@ -25072,7 +25167,7 @@ def S4_vrcrotate_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_60571023, TypeS_3op>, Enc_b72622 {
+tc_9debc299, TypeS_3op>, Enc_b72622 {
let Inst{7-6} = 0b00;
let Inst{31-21} = 0b11001011101;
let prefersSlot3 = 1;
@@ -25082,7 +25177,7 @@ def S4_vxaddsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25093,7 +25188,7 @@ def S4_vxaddsubhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25104,7 +25199,7 @@ def S4_vxaddsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25115,7 +25210,7 @@ def S4_vxsubaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25126,7 +25221,7 @@ def S4_vxsubaddhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25137,7 +25232,7 @@ def S4_vxsubaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25148,7 +25243,7 @@ def S5_asrhub_rnd_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25161,7 +25256,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -25170,7 +25265,7 @@ def S5_asrhub_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b101;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25183,7 +25278,7 @@ def S5_popcountp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = popcount($Rss32)",
-tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_703e822c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -25194,7 +25289,7 @@ def S5_vasrhrnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000001;
@@ -25204,14 +25299,14 @@ def S5_vasrhrnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
let isPseudo = 1;
}
def S6_allocframe_to_raw : HInst<
(outs),
(ins u11_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_e216a5db, TypeMAPPING>, Requires<[HasV65]> {
+tc_b44ecf75, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -25219,7 +25314,7 @@ def S6_rol_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = rol($Rss32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000000000;
}
@@ -25227,7 +25322,7 @@ def S6_rol_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25237,7 +25332,7 @@ def S6_rol_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25247,7 +25342,7 @@ def S6_rol_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25257,7 +25352,7 @@ def S6_rol_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25267,7 +25362,7 @@ def S6_rol_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -25277,7 +25372,7 @@ def S6_rol_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = rol($Rs32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -25288,7 +25383,7 @@ def S6_rol_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25301,7 +25396,7 @@ def S6_rol_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25314,7 +25409,7 @@ def S6_rol_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25327,7 +25422,7 @@ def S6_rol_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25340,7 +25435,7 @@ def S6_rol_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -25353,7 +25448,7 @@ def S6_vsplatrbp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplatb($Rs32)",
-tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
+tc_a1c00888, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100010;
}
@@ -25361,7 +25456,7 @@ def S6_vtrunehb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25370,7 +25465,7 @@ def S6_vtrunohb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25379,7 +25474,7 @@ def SA1_addi : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, s32_0Imm:$Ii),
"$Rx16 = add($Rx16in,#$Ii)",
-tc_609d2efe, TypeSUBINSN>, Enc_93af4c {
+tc_0a705168, TypeSUBINSN>, Enc_93af4c {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25396,7 +25491,7 @@ def SA1_addrx : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
"$Rx16 = add($Rx16in,$Rs16)",
-tc_609d2efe, TypeSUBINSN>, Enc_0527db {
+tc_0a705168, TypeSUBINSN>, Enc_0527db {
let Inst{12-8} = 0b11000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25408,7 +25503,7 @@ def SA1_addsp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_2Imm:$Ii),
"$Rd16 = add(r29,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_2df31d {
+tc_9fc3dae0, TypeSUBINSN>, Enc_2df31d {
let Inst{12-10} = 0b011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25420,7 +25515,7 @@ def SA1_and1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25431,7 +25526,7 @@ def SA1_clrf : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25445,7 +25540,7 @@ def SA1_clrfnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25460,7 +25555,7 @@ def SA1_clrt : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100110;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25473,7 +25568,7 @@ def SA1_clrtnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25487,7 +25582,7 @@ def SA1_cmpeqi : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
"p0 = cmp.eq($Rs16,#$Ii)",
-tc_90f3e30c, TypeSUBINSN>, Enc_63eaeb {
+tc_5b7c0967, TypeSUBINSN>, Enc_63eaeb {
let Inst{3-2} = 0b00;
let Inst{12-8} = 0b11001;
let AsmVariantName = "NonParsable";
@@ -25498,7 +25593,7 @@ def SA1_combine0i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#0,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b00;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25510,7 +25605,7 @@ def SA1_combine1i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#1,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b01;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25522,7 +25617,7 @@ def SA1_combine2i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#2,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b10;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25534,7 +25629,7 @@ def SA1_combine3i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#3,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b11;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25546,7 +25641,7 @@ def SA1_combinerz : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine($Rs16,#0)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b1;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25558,7 +25653,7 @@ def SA1_combinezr : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine(#0,$Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b0;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25570,7 +25665,7 @@ def SA1_dec : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, n1Const:$n1),
"$Rd16 = add($Rs16,#$n1)",
-tc_609d2efe, TypeSUBINSN>, Enc_ee5ed0 {
+tc_0a705168, TypeSUBINSN>, Enc_ee5ed0 {
let Inst{12-8} = 0b10011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25581,7 +25676,7 @@ def SA1_inc : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = add($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25592,7 +25687,7 @@ def SA1_seti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u32_0Imm:$Ii),
"$Rd16 = #$Ii",
-tc_a904d137, TypeSUBINSN>, Enc_e39bb2 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_e39bb2 {
let Inst{12-10} = 0b010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25608,7 +25703,7 @@ def SA1_setin1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins n1Const:$n1),
"$Rd16 = #$n1",
-tc_a904d137, TypeSUBINSN>, Enc_7a0ea6 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_7a0ea6 {
let Inst{12-4} = 0b110100000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25619,7 +25714,7 @@ def SA1_sxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxtb($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25630,7 +25725,7 @@ def SA1_sxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25641,7 +25736,7 @@ def SA1_tfr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = $Rs16",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25652,7 +25747,7 @@ def SA1_zxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#255)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25663,7 +25758,7 @@ def SA1_zxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = zxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25674,7 +25769,7 @@ def SL1_loadri_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"$Rd16 = memw($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_53dca9 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_53dca9 {
let Inst{12-12} = 0b0;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25688,7 +25783,7 @@ def SL1_loadrub_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"$Rd16 = memub($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_c175d0 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_c175d0 {
let Inst{12-12} = 0b1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25702,7 +25797,7 @@ def SL2_deallocframe : HInst<
(outs),
(ins),
"deallocframe",
-tc_36c68ad1, TypeSUBINSN>, Enc_e3b0c4 {
+tc_39dfefe8, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111100000000;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
@@ -25715,7 +25810,7 @@ def SL2_jumpr31 : HInst<
(outs),
(ins),
"jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25730,7 +25825,7 @@ def SL2_jumpr31_f : HInst<
(outs),
(ins),
"if (!p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25748,7 +25843,7 @@ def SL2_jumpr31_fnew : HInst<
(outs),
(ins),
"if (!p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25767,7 +25862,7 @@ def SL2_jumpr31_t : HInst<
(outs),
(ins),
"if (p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -25784,7 +25879,7 @@ def SL2_jumpr31_tnew : HInst<
(outs),
(ins),
"if (p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -25802,7 +25897,7 @@ def SL2_loadrb_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
"$Rd16 = memb($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2fbf3c {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2fbf3c {
let Inst{12-11} = 0b10;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25816,7 +25911,7 @@ def SL2_loadrd_sp : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u5_3Imm:$Ii),
"$Rdd8 = memd(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_86a14b {
+tc_c4db48cb, TypeSUBINSN>, Enc_86a14b {
let Inst{12-8} = 0b11110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25831,7 +25926,7 @@ def SL2_loadrh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25845,7 +25940,7 @@ def SL2_loadri_sp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u5_2Imm:$Ii),
"$Rd16 = memw(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_51635c {
+tc_c4db48cb, TypeSUBINSN>, Enc_51635c {
let Inst{12-9} = 0b1110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25860,7 +25955,7 @@ def SL2_loadruh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memuh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b01;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25874,7 +25969,7 @@ def SL2_return : HInst<
(outs),
(ins),
"dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25892,7 +25987,7 @@ def SL2_return_f : HInst<
(outs),
(ins),
"if (!p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25913,7 +26008,7 @@ def SL2_return_fnew : HInst<
(outs),
(ins),
"if (!p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25935,7 +26030,7 @@ def SL2_return_t : HInst<
(outs),
(ins),
"if (p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -25955,7 +26050,7 @@ def SL2_return_tnew : HInst<
(outs),
(ins),
"if (p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -25976,7 +26071,7 @@ def SS1_storeb_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
"memb($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_b38ffc {
+tc_30b9bb4a, TypeSUBINSN>, Enc_b38ffc {
let Inst{12-12} = 0b1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -25988,7 +26083,7 @@ def SS1_storew_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_f55a0c {
+tc_30b9bb4a, TypeSUBINSN>, Enc_f55a0c {
let Inst{12-12} = 0b0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26000,7 +26095,7 @@ def SS2_allocframe : HInst<
(outs),
(ins u5_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_0fc1ae07, TypeSUBINSN>, Enc_6f70ca {
+tc_49a8207d, TypeSUBINSN>, Enc_6f70ca {
let Inst{3-0} = 0b0000;
let Inst{12-9} = 0b1110;
let addrMode = BaseImmOffset;
@@ -26015,7 +26110,7 @@ def SS2_storebi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10010;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26027,7 +26122,7 @@ def SS2_storebi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10011;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26039,7 +26134,7 @@ def SS2_stored_sp : HInst<
(outs),
(ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
"memd(r29+#$Ii) = $Rtt8",
-tc_a788683e, TypeSUBINSN>, Enc_b8309d {
+tc_0371abea, TypeSUBINSN>, Enc_b8309d {
let Inst{12-9} = 0b0101;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
@@ -26052,7 +26147,7 @@ def SS2_storeh_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
"memh($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_625deb {
+tc_30b9bb4a, TypeSUBINSN>, Enc_625deb {
let Inst{12-11} = 0b00;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
@@ -26064,7 +26159,7 @@ def SS2_storew_sp : HInst<
(outs),
(ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw(r29+#$Ii) = $Rt16",
-tc_a788683e, TypeSUBINSN>, Enc_87c142 {
+tc_0371abea, TypeSUBINSN>, Enc_87c142 {
let Inst{12-9} = 0b0100;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26077,7 +26172,7 @@ def SS2_storewi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10000;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26089,7 +26184,7 @@ def SS2_storewi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10001;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26227,7 +26322,7 @@ def V6_extractw : HInst<
(outs IntRegs:$Rd32),
(ins HvxVR:$Vu32, IntRegs:$Rs32),
"$Rd32 = vextract($Vu32,$Rs32)",
-tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
+tc_540c3da3, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10010010000;
@@ -26448,7 +26543,7 @@ def V6_lvsplatb : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.b = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
@@ -26459,7 +26554,7 @@ def V6_lvsplath : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.h = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
@@ -26470,7 +26565,7 @@ def V6_lvsplatw : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32 = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
+tc_c4edf264, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26481,7 +26576,7 @@ def V6_pred_and : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = and($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26494,7 +26589,7 @@ def V6_pred_and_n : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = and($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26507,7 +26602,7 @@ def V6_pred_not : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4),
"$Qd4 = not($Qs4)",
-tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b0001111000000011;
@@ -26519,7 +26614,7 @@ def V6_pred_or : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = or($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26532,7 +26627,7 @@ def V6_pred_or_n : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = or($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26545,7 +26640,7 @@ def V6_pred_scalar2 : HInst<
(outs HvxQR:$Qd4),
(ins IntRegs:$Rt32),
"$Qd4 = vsetq($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
let Inst{13-2} = 0b000000010001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26556,7 +26651,7 @@ def V6_pred_scalar2v2 : HInst<
(outs HvxQR:$Qd4),
(ins IntRegs:$Rt32),
"$Qd4 = vsetq2($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
let Inst{13-2} = 0b000000010011;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -26567,7 +26662,7 @@ def V6_pred_xor : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4 = xor($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000011;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26580,7 +26675,7 @@ def V6_shuffeqh : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
let Inst{7-2} = 0b000110;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26593,7 +26688,7 @@ def V6_shuffeqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxQR:$Qs4, HvxQR:$Qt4),
"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
let Inst{7-2} = 0b000111;
let Inst{13-10} = 0b0000;
let Inst{21-16} = 0b000011;
@@ -26743,7 +26838,7 @@ def V6_vL32Ub_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmemu($Rt32+#$Ii)",
-tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
+tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26760,7 +26855,7 @@ def V6_vL32Ub_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmemu($Rx32++#$Ii)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -26779,7 +26874,7 @@ def V6_vL32Ub_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmemu($Rx32++$Mu2)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
let Inst{12-5} = 0b00000111;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -26796,7 +26891,7 @@ def V6_vL32b_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26816,7 +26911,7 @@ def V6_vL32b_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -26836,7 +26931,7 @@ def V6_vL32b_cur_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -26856,7 +26951,7 @@ def V6_vL32b_cur_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -26878,7 +26973,7 @@ def V6_vL32b_cur_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000101;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -26899,7 +26994,7 @@ def V6_vL32b_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -26920,7 +27015,7 @@ def V6_vL32b_cur_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.cur = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000001;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -26940,7 +27035,7 @@ def V6_vL32b_cur_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -26959,7 +27054,7 @@ def V6_vL32b_cur_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -26980,7 +27075,7 @@ def V6_vL32b_cur_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000100;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27000,7 +27095,7 @@ def V6_vL32b_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27019,7 +27114,7 @@ def V6_vL32b_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27040,7 +27135,7 @@ def V6_vL32b_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000011;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27060,7 +27155,7 @@ def V6_vL32b_nt_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32 = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27081,7 +27176,7 @@ def V6_vL32b_nt_cur_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27102,7 +27197,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27123,7 +27218,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27146,7 +27241,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000101;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27168,7 +27263,7 @@ def V6_vL32b_nt_cur_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b001;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27190,7 +27285,7 @@ def V6_vL32b_nt_cur_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000001;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27211,7 +27306,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27231,7 +27326,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27253,7 +27348,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000100;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27274,7 +27369,7 @@ def V6_vL32b_nt_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27294,7 +27389,7 @@ def V6_vL32b_nt_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27316,7 +27411,7 @@ def V6_vL32b_nt_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000011;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27337,7 +27432,7 @@ def V6_vL32b_nt_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27359,7 +27454,7 @@ def V6_vL32b_nt_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27380,7 +27475,7 @@ def V6_vL32b_nt_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27399,7 +27494,7 @@ def V6_vL32b_nt_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27420,7 +27515,7 @@ def V6_vL32b_nt_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000010;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27440,7 +27535,7 @@ def V6_vL32b_nt_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000010;
@@ -27460,7 +27555,7 @@ def V6_vL32b_nt_tmp_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27480,7 +27575,7 @@ def V6_vL32b_nt_tmp_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27502,7 +27597,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27523,7 +27618,7 @@ def V6_vL32b_nt_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001010;
@@ -27544,7 +27639,7 @@ def V6_vL32b_nt_tmp_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000010;
let Inst{31-21} = 0b00101011010;
let hasNewValue = 1;
@@ -27564,7 +27659,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000110;
let isPredicated = 1;
@@ -27583,7 +27678,7 @@ def V6_vL32b_nt_tmp_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -27604,7 +27699,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011110;
let isPredicated = 1;
@@ -27624,7 +27719,7 @@ def V6_vL32b_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32 = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27645,7 +27740,7 @@ def V6_vL32b_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32 = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -27665,7 +27760,7 @@ def V6_vL32b_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27683,7 +27778,7 @@ def V6_vL32b_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27703,7 +27798,7 @@ def V6_vL32b_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000010;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27722,7 +27817,7 @@ def V6_vL32b_tmp_ai : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"$Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000000;
@@ -27741,7 +27836,7 @@ def V6_vL32b_tmp_npred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27760,7 +27855,7 @@ def V6_vL32b_tmp_npred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27781,7 +27876,7 @@ def V6_vL32b_tmp_npred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27801,7 +27896,7 @@ def V6_vL32b_tmp_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"$Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
let Inst{7-5} = 0b010;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001000;
@@ -27821,7 +27916,7 @@ def V6_vL32b_tmp_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
let Inst{12-5} = 0b00000010;
let Inst{31-21} = 0b00101011000;
let hasNewValue = 1;
@@ -27840,7 +27935,7 @@ def V6_vL32b_tmp_pred_ai : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000100;
let isPredicated = 1;
@@ -27858,7 +27953,7 @@ def V6_vL32b_tmp_pred_pi : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -27878,7 +27973,7 @@ def V6_vL32b_tmp_pred_ppu : HInst<
(outs HvxVR:$Vd32, IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011100;
let isPredicated = 1;
@@ -27897,7 +27992,7 @@ def V6_vS32Ub_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rt32+#$Ii) = $Vs32",
-tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -27912,7 +28007,7 @@ def V6_vS32Ub_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -27927,7 +28022,7 @@ def V6_vS32Ub_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -27944,7 +28039,7 @@ def V6_vS32Ub_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000111;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -27960,7 +28055,7 @@ def V6_vS32Ub_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmemu($Rx32++#$Ii) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b111;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -27976,7 +28071,7 @@ def V6_vS32Ub_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmemu($Rx32++$Mu2) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000111;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -27991,7 +28086,7 @@ def V6_vS32Ub_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28005,7 +28100,7 @@ def V6_vS32Ub_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28021,7 +28116,7 @@ def V6_vS32Ub_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000110;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28036,7 +28131,7 @@ def V6_vS32b_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii) = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28052,7 +28147,7 @@ def V6_vS32b_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii) = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28071,7 +28166,7 @@ def V6_vS32b_new_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01101;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28090,7 +28185,7 @@ def V6_vS32b_new_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28111,7 +28206,7 @@ def V6_vS32b_new_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001101;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28131,7 +28226,7 @@ def V6_vS32b_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28151,7 +28246,7 @@ def V6_vS32b_new_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"vmem($Rx32++$Mu2) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-3} = 0b0000000100;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28170,7 +28265,7 @@ def V6_vS32b_new_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01000;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28188,7 +28283,7 @@ def V6_vS32b_new_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28208,7 +28303,7 @@ def V6_vS32b_new_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001000;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28227,7 +28322,7 @@ def V6_vS32b_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28243,7 +28338,7 @@ def V6_vS32b_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28261,7 +28356,7 @@ def V6_vS32b_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28278,7 +28373,7 @@ def V6_vS32b_nqpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
@@ -28290,7 +28385,7 @@ def V6_vS32b_nqpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -28304,7 +28399,7 @@ def V6_vS32b_nqpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
@@ -28317,7 +28412,7 @@ def V6_vS32b_nt_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rt32+#$Ii):nt = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -28334,7 +28429,7 @@ def V6_vS32b_nt_new_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
@@ -28354,7 +28449,7 @@ def V6_vS32b_nt_new_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01111;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28374,7 +28469,7 @@ def V6_vS32b_nt_new_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28396,7 +28491,7 @@ def V6_vS32b_nt_new_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001111;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28417,7 +28512,7 @@ def V6_vS32b_nt_new_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b00100;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -28438,7 +28533,7 @@ def V6_vS32b_nt_new_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-3} = 0b0000000100;
let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
@@ -28458,7 +28553,7 @@ def V6_vS32b_nt_new_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01010;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28477,7 +28572,7 @@ def V6_vS32b_nt_new_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-3} = 0b01010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28498,7 +28593,7 @@ def V6_vS32b_nt_new_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-3} = 0b00001010;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28518,7 +28613,7 @@ def V6_vS32b_nt_npred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28535,7 +28630,7 @@ def V6_vS32b_nt_npred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28554,7 +28649,7 @@ def V6_vS32b_nt_npred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28572,7 +28667,7 @@ def V6_vS32b_nt_nqpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
@@ -28585,7 +28680,7 @@ def V6_vS32b_nt_nqpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -28600,7 +28695,7 @@ def V6_vS32b_nt_nqpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
@@ -28614,7 +28709,7 @@ def V6_vS32b_nt_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
@@ -28632,7 +28727,7 @@ def V6_vS32b_nt_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmem($Rx32++$Mu2):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
@@ -28649,7 +28744,7 @@ def V6_vS32b_nt_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
@@ -28665,7 +28760,7 @@ def V6_vS32b_nt_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001111;
@@ -28683,7 +28778,7 @@ def V6_vS32b_nt_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
@@ -28700,7 +28795,7 @@ def V6_vS32b_nt_qpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
@@ -28713,7 +28808,7 @@ def V6_vS32b_nt_qpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
@@ -28728,7 +28823,7 @@ def V6_vS32b_nt_qpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
@@ -28742,7 +28837,7 @@ def V6_vS32b_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"vmem($Rx32++#$Ii) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28759,7 +28854,7 @@ def V6_vS32b_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"vmem($Rx32++$Mu2) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28774,7 +28869,7 @@ def V6_vS32b_pred_ai : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
@@ -28789,7 +28884,7 @@ def V6_vS32b_pred_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001101;
@@ -28806,7 +28901,7 @@ def V6_vS32b_pred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
@@ -28822,7 +28917,7 @@ def V6_vS32b_qpred_ai : HInst<
(outs),
(ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
@@ -28834,7 +28929,7 @@ def V6_vS32b_qpred_pi : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
@@ -28848,7 +28943,7 @@ def V6_vS32b_qpred_ppu : HInst<
(outs IntRegs:$Rx32),
(ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
@@ -28861,7 +28956,7 @@ def V6_vS32b_srls_ai : HInst<
(outs),
(ins IntRegs:$Rt32, s4_0Imm:$Ii),
"vmem($Rt32+#$Ii):scatter_release",
-tc_29841470, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
+tc_3ce09744, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
let Inst{7-0} = 0b00101000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
@@ -28875,7 +28970,7 @@ def V6_vS32b_srls_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
"vmem($Rx32++#$Ii):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
let Inst{7-0} = 0b00101000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
@@ -28890,7 +28985,7 @@ def V6_vS32b_srls_ppu : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"vmem($Rx32++$Mu2):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
let Inst{12-0} = 0b0000000101000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
@@ -28904,7 +28999,7 @@ def V6_vabsb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vabs($Vu32.b)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -28927,7 +29022,7 @@ def V6_vabsb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vabs($Vu32.b):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -28950,7 +29045,7 @@ def V6_vabsdiffh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -28973,7 +29068,7 @@ def V6_vabsdiffub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -28996,7 +29091,7 @@ def V6_vabsdiffuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -29019,7 +29114,7 @@ def V6_vabsdiffw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -29042,7 +29137,7 @@ def V6_vabsh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vabs($Vu32.h)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29065,7 +29160,7 @@ def V6_vabsh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vabs($Vu32.h):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29088,7 +29183,7 @@ def V6_vabsub_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.ub = vabs($Vu32.b)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29099,7 +29194,7 @@ def V6_vabsuh_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uh = vabs($Vu32.h)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29110,7 +29205,7 @@ def V6_vabsuw_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uw = vabs($Vu32.w)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -29121,7 +29216,7 @@ def V6_vabsw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vabs($Vu32.w)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29144,7 +29239,7 @@ def V6_vabsw_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vabs($Vu32.w):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -29167,7 +29262,7 @@ def V6_vaddb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -29190,7 +29285,7 @@ def V6_vaddb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29213,7 +29308,7 @@ def V6_vaddbnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29241,7 +29336,7 @@ def V6_vaddbq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29269,7 +29364,7 @@ def V6_vaddbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -29292,7 +29387,7 @@ def V6_vaddbsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29315,7 +29410,7 @@ def V6_vaddcarry : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qx4),
(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
@@ -29324,11 +29419,37 @@ let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vaddcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vadd($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddcarrysat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qs4),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qs4):carry:sat",
+tc_257f6f7c, TypeCVI_VA>, Enc_e0820b, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vaddclbh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -29340,7 +29461,7 @@ def V6_vaddclbw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -29352,7 +29473,7 @@ def V6_vaddh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -29375,7 +29496,7 @@ def V6_vaddh_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29398,7 +29519,7 @@ def V6_vaddhnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29426,7 +29547,7 @@ def V6_vaddhq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29454,7 +29575,7 @@ def V6_vaddhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29477,7 +29598,7 @@ def V6_vaddhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29500,7 +29621,7 @@ def V6_vaddhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29512,7 +29633,7 @@ def V6_vaddhw_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -29550,7 +29671,7 @@ def V6_vaddubh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29562,7 +29683,7 @@ def V6_vaddubh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -29600,7 +29721,7 @@ def V6_vaddubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29623,7 +29744,7 @@ def V6_vaddubsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29646,7 +29767,7 @@ def V6_vaddububb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29658,7 +29779,7 @@ def V6_vadduhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29681,7 +29802,7 @@ def V6_vadduhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29704,7 +29825,7 @@ def V6_vadduhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -29716,7 +29837,7 @@ def V6_vadduhw_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -29754,7 +29875,7 @@ def V6_vadduwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -29777,7 +29898,7 @@ def V6_vadduwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -29800,7 +29921,7 @@ def V6_vaddw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29823,7 +29944,7 @@ def V6_vaddw_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -29846,7 +29967,7 @@ def V6_vaddwnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29874,7 +29995,7 @@ def V6_vaddwq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -29902,7 +30023,7 @@ def V6_vaddwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -29925,7 +30046,7 @@ def V6_vaddwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -29948,7 +30069,7 @@ def V6_valignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -29960,7 +30081,7 @@ def V6_valignbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
@@ -29971,7 +30092,7 @@ def V6_vand : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vand($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -29983,7 +30104,7 @@ def V6_vandnqrt : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qu4, IntRegs:$Rt32),
"$Vd32 = vand(!$Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
+tc_ac4046bc, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-10} = 0b0001;
let Inst{31-21} = 0b00011001101;
@@ -29995,7 +30116,7 @@ def V6_vandnqrt_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
"$Vx32 |= vand(!$Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
+tc_2e8f5f6e, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b00011001011;
@@ -30033,7 +30154,7 @@ def V6_vandqrt : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qu4, IntRegs:$Rt32),
"$Vd32 = vand($Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b00011001101;
@@ -30045,7 +30166,7 @@ def V6_vandqrt_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
"$Vx32 |= vand($Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b00011001011;
@@ -30083,7 +30204,7 @@ def V6_vandvnqv : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4, HvxVR:$Vu32),
"$Vd32 = vand(!$Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000011;
@@ -30096,7 +30217,7 @@ def V6_vandvqv : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4, HvxVR:$Vu32),
"$Vd32 = vand($Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000011;
@@ -30109,7 +30230,7 @@ def V6_vandvrt : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Qd4 = vand($Vu32,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -30121,7 +30242,7 @@ def V6_vandvrt_acc : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Qx4 |= vand($Vu32,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30155,7 +30276,7 @@ def V6_vaslh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vasl($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -30167,7 +30288,7 @@ def V6_vaslh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vasl($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -30205,7 +30326,7 @@ def V6_vaslhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30228,7 +30349,7 @@ def V6_vaslw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vasl($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30240,7 +30361,7 @@ def V6_vaslw_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vasl($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30278,7 +30399,7 @@ def V6_vaslwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30297,11 +30418,36 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vasr_into : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32.w = vasrinto($Vu32.w,$Vv32.w)",
+tc_df80eeb0, TypeCVI_VP_VS>, Enc_3fc427, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vasr_into_alt : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32 = vasrinto($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
def V6_vasrh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vasr($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30313,7 +30459,7 @@ def V6_vasrh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vasr($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -30351,7 +30497,7 @@ def V6_vasrhbrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -30363,7 +30509,7 @@ def V6_vasrhbrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30373,7 +30519,7 @@ def V6_vasrhbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30385,7 +30531,7 @@ def V6_vasrhubrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30397,7 +30543,7 @@ def V6_vasrhubrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30407,7 +30553,7 @@ def V6_vasrhubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30419,7 +30565,7 @@ def V6_vasrhubsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30429,7 +30575,7 @@ def V6_vasrhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30452,7 +30598,7 @@ def V6_vasruhubrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30464,7 +30610,7 @@ def V6_vasruhubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
@@ -30476,7 +30622,7 @@ def V6_vasruwuhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30488,7 +30634,7 @@ def V6_vasruwuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
@@ -30500,7 +30646,7 @@ def V6_vasrw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vasr($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -30512,7 +30658,7 @@ def V6_vasrw_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vasr($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -30550,7 +30696,7 @@ def V6_vasrwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30562,7 +30708,7 @@ def V6_vasrwh_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30572,7 +30718,7 @@ def V6_vasrwhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30584,7 +30730,7 @@ def V6_vasrwhrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30594,7 +30740,7 @@ def V6_vasrwhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30606,7 +30752,7 @@ def V6_vasrwhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30616,7 +30762,7 @@ def V6_vasrwuhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -30628,7 +30774,7 @@ def V6_vasrwuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -30640,7 +30786,7 @@ def V6_vasrwuhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30650,7 +30796,7 @@ def V6_vasrwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -30673,7 +30819,7 @@ def V6_vassign : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32 = $Vu32",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000011;
@@ -30695,7 +30841,7 @@ def V6_vavgb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30718,7 +30864,7 @@ def V6_vavgbrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vavg($Vu32.b,$Vv32.b):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30741,7 +30887,7 @@ def V6_vavgh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30764,7 +30910,7 @@ def V6_vavghrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30787,7 +30933,7 @@ def V6_vavgub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30810,7 +30956,7 @@ def V6_vavgubrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30833,7 +30979,7 @@ def V6_vavguh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30856,7 +31002,7 @@ def V6_vavguhrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30879,7 +31025,7 @@ def V6_vavguw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30902,7 +31048,7 @@ def V6_vavguwrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -30925,7 +31071,7 @@ def V6_vavgw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
@@ -30948,7 +31094,7 @@ def V6_vavgwrnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -30971,7 +31117,7 @@ def V6_vccombine : HInst<
(outs HvxWR:$Vdd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011010011;
@@ -30984,7 +31130,7 @@ def V6_vcl0h : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uh = vcl0($Vu32.uh)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -31007,7 +31153,7 @@ def V6_vcl0w : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.uw = vcl0($Vu32.uw)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -31030,7 +31176,7 @@ def V6_vcmov : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32),
"if ($Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001101000000000;
@@ -31043,7 +31189,7 @@ def V6_vcombine : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vcombine($Vu32,$Vv32)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -31067,7 +31213,7 @@ def V6_vdd0 : HInst<
(outs HvxWR:$Vdd32),
(ins),
"$Vdd32 = #0",
-tc_8a6eb39a, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_718b5c53, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -31078,7 +31224,7 @@ def V6_vdeal : HInst<
(outs HvxVR:$Vy32, HvxVR:$Vx32),
(ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
"vdeal($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001111;
@@ -31093,7 +31239,7 @@ def V6_vdealb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vdeal($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -31105,7 +31251,7 @@ def V6_vdealb4w : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -31139,7 +31285,7 @@ def V6_vdealh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vdeal($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -31162,7 +31308,7 @@ def V6_vdealvdd : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -31174,7 +31320,7 @@ def V6_vdelta : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -31186,7 +31332,7 @@ def V6_vdmpybus : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31198,7 +31344,7 @@ def V6_vdmpybus_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31236,7 +31382,7 @@ def V6_vdmpybus_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31248,7 +31394,7 @@ def V6_vdmpybus_dv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31286,7 +31432,7 @@ def V6_vdmpyhb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31298,7 +31444,7 @@ def V6_vdmpyhb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -31336,7 +31482,7 @@ def V6_vdmpyhb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31348,7 +31494,7 @@ def V6_vdmpyhb_dv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31386,7 +31532,7 @@ def V6_vdmpyhisat : HInst<
(outs HvxVR:$Vd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31398,7 +31544,7 @@ def V6_vdmpyhisat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31436,7 +31582,7 @@ def V6_vdmpyhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31448,7 +31594,7 @@ def V6_vdmpyhsat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31486,7 +31632,7 @@ def V6_vdmpyhsuisat : HInst<
(outs HvxVR:$Vd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31498,7 +31644,7 @@ def V6_vdmpyhsuisat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31536,7 +31682,7 @@ def V6_vdmpyhsusat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -31548,7 +31694,7 @@ def V6_vdmpyhsusat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -31586,7 +31732,7 @@ def V6_vdmpyhvsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -31598,7 +31744,7 @@ def V6_vdmpyhvsat_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -31636,7 +31782,7 @@ def V6_vdsaduh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -31648,7 +31794,7 @@ def V6_vdsaduh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -31686,7 +31832,7 @@ def V6_veqb : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31698,7 +31844,7 @@ def V6_veqb_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31709,7 +31855,7 @@ def V6_veqb_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31721,7 +31867,7 @@ def V6_veqb_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31732,7 +31878,7 @@ def V6_veqh : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31744,7 +31890,7 @@ def V6_veqh_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31755,7 +31901,7 @@ def V6_veqh_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31767,7 +31913,7 @@ def V6_veqh_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31778,7 +31924,7 @@ def V6_veqw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31790,7 +31936,7 @@ def V6_veqw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31801,7 +31947,7 @@ def V6_veqw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31813,7 +31959,7 @@ def V6_veqw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31824,7 +31970,7 @@ def V6_vgathermh : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00001000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31840,7 +31986,7 @@ def V6_vgathermhq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001010;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31856,7 +32002,7 @@ def V6_vgathermhw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_bfe309d5, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
+tc_05058f6f, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31872,7 +32018,7 @@ def V6_vgathermhwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_98733e9d, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
+tc_fd7610da, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001100;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31888,7 +32034,7 @@ def V6_vgathermw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31904,7 +32050,7 @@ def V6_vgathermwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
"if ($Qs4) vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
@@ -31920,7 +32066,7 @@ def V6_vgtb : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31932,7 +32078,7 @@ def V6_vgtb_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31943,7 +32089,7 @@ def V6_vgtb_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31955,7 +32101,7 @@ def V6_vgtb_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31966,7 +32112,7 @@ def V6_vgth : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -31978,7 +32124,7 @@ def V6_vgth_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -31989,7 +32135,7 @@ def V6_vgth_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32001,7 +32147,7 @@ def V6_vgth_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32012,7 +32158,7 @@ def V6_vgtub : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32024,7 +32170,7 @@ def V6_vgtub_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32035,7 +32181,7 @@ def V6_vgtub_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32047,7 +32193,7 @@ def V6_vgtub_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32058,7 +32204,7 @@ def V6_vgtuh : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32070,7 +32216,7 @@ def V6_vgtuh_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32081,7 +32227,7 @@ def V6_vgtuh_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32093,7 +32239,7 @@ def V6_vgtuh_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32104,7 +32250,7 @@ def V6_vgtuw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32116,7 +32262,7 @@ def V6_vgtuw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32127,7 +32273,7 @@ def V6_vgtuw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b011010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32139,7 +32285,7 @@ def V6_vgtuw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32150,7 +32296,7 @@ def V6_vgtw : HInst<
(outs HvxQR:$Qd4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
@@ -32162,7 +32308,7 @@ def V6_vgtw_and : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32173,7 +32319,7 @@ def V6_vgtw_or : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b010110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32185,7 +32331,7 @@ def V6_vgtw_xor : HInst<
(outs HvxQR:$Qx4),
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
@@ -32196,7 +32342,7 @@ def V6_vhist : HInst<
(outs),
(ins),
"vhist",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -32205,7 +32351,7 @@ def V6_vhistq : HInst<
(outs),
(ins HvxQR:$Qv4),
"vhist($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -32215,7 +32361,7 @@ def V6_vinsertwr : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, IntRegs:$Rt32),
"$Vx32.w = vinsert($Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
let Inst{13-5} = 0b100000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
@@ -32227,7 +32373,7 @@ def V6_vlalignb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
@@ -32239,7 +32385,7 @@ def V6_vlalignbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
@@ -32250,7 +32396,7 @@ def V6_vlsrb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32262,7 +32408,7 @@ def V6_vlsrh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32285,7 +32431,7 @@ def V6_vlsrhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -32308,7 +32454,7 @@ def V6_vlsrw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32331,7 +32477,7 @@ def V6_vlsrwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
@@ -32354,7 +32500,7 @@ def V6_vlut4 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vd32.h = vlut4($Vu32.uh,$Rtt32.h)",
-tc_fa99dc24, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
+tc_f1de44ef, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -32366,7 +32512,7 @@ def V6_vlutvvb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32378,7 +32524,7 @@ def V6_vlutvvb_nm : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -32390,7 +32536,7 @@ def V6_vlutvvb_oracc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32404,7 +32550,7 @@ def V6_vlutvvb_oracci : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
@@ -32417,7 +32563,7 @@ def V6_vlutvvbi : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
@@ -32428,7 +32574,7 @@ def V6_vlutvwh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32440,7 +32586,7 @@ def V6_vlutvwh_nm : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
@@ -32452,7 +32598,7 @@ def V6_vlutvwh_oracc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -32466,7 +32612,7 @@ def V6_vlutvwh_oracci : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
@@ -32479,7 +32625,7 @@ def V6_vlutvwhi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
@@ -32490,7 +32636,7 @@ def V6_vmaxb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32513,7 +32659,7 @@ def V6_vmaxh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32536,7 +32682,7 @@ def V6_vmaxub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32559,7 +32705,7 @@ def V6_vmaxuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32582,7 +32728,7 @@ def V6_vmaxw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32605,7 +32751,7 @@ def V6_vminb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -32628,7 +32774,7 @@ def V6_vminh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32651,7 +32797,7 @@ def V6_vminub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32674,7 +32820,7 @@ def V6_vminuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32697,7 +32843,7 @@ def V6_vminw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
@@ -32720,7 +32866,7 @@ def V6_vmpabus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32732,7 +32878,7 @@ def V6_vmpabus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32770,7 +32916,7 @@ def V6_vmpabusv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -32793,7 +32939,7 @@ def V6_vmpabuu : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -32805,7 +32951,7 @@ def V6_vmpabuu_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -32843,7 +32989,7 @@ def V6_vmpabuuv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -32866,7 +33012,7 @@ def V6_vmpahb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -32878,7 +33024,7 @@ def V6_vmpahb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -32916,7 +33062,7 @@ def V6_vmpahhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmpa($Vx32in.h,$Vu32.h,$Rtt32.h):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32929,7 +33075,7 @@ def V6_vmpauhb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -32941,7 +33087,7 @@ def V6_vmpauhb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32979,7 +33125,7 @@ def V6_vmpauhuhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmpa($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -32992,7 +33138,7 @@ def V6_vmpsuhuhsat : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vx32.h = vmps($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33005,7 +33151,7 @@ def V6_vmpybus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
@@ -33017,7 +33163,7 @@ def V6_vmpybus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001001;
@@ -33055,7 +33201,7 @@ def V6_vmpybusv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33067,7 +33213,7 @@ def V6_vmpybusv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33105,7 +33251,7 @@ def V6_vmpybv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33117,7 +33263,7 @@ def V6_vmpybv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33155,7 +33301,7 @@ def V6_vmpyewuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -33167,7 +33313,7 @@ def V6_vmpyewuh_64 : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -33190,7 +33336,7 @@ def V6_vmpyh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33202,7 +33348,7 @@ def V6_vmpyh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.w += vmpy($Vu32.h,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -33240,7 +33386,7 @@ def V6_vmpyhsat_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33267,7 +33413,7 @@ def V6_vmpyhsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33290,7 +33436,7 @@ def V6_vmpyhss : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -33313,7 +33459,7 @@ def V6_vmpyhus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33325,7 +33471,7 @@ def V6_vmpyhus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33363,7 +33509,7 @@ def V6_vmpyhv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33375,7 +33521,7 @@ def V6_vmpyhv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -33413,7 +33559,7 @@ def V6_vmpyhvsrs : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33436,7 +33582,7 @@ def V6_vmpyieoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -33448,7 +33594,7 @@ def V6_vmpyiewh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100010;
@@ -33475,7 +33621,7 @@ def V6_vmpyiewuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -33487,7 +33633,7 @@ def V6_vmpyiewuh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33525,7 +33671,7 @@ def V6_vmpyih : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -33537,7 +33683,7 @@ def V6_vmpyih_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33575,7 +33721,7 @@ def V6_vmpyihb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -33587,7 +33733,7 @@ def V6_vmpyihb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -33625,7 +33771,7 @@ def V6_vmpyiowh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -33648,7 +33794,7 @@ def V6_vmpyiwb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -33660,7 +33806,7 @@ def V6_vmpyiwb_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33698,7 +33844,7 @@ def V6_vmpyiwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -33710,7 +33856,7 @@ def V6_vmpyiwh_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -33748,7 +33894,7 @@ def V6_vmpyiwub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
@@ -33760,7 +33906,7 @@ def V6_vmpyiwub_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33798,7 +33944,7 @@ def V6_vmpyowh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -33810,7 +33956,7 @@ def V6_vmpyowh_64_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33835,7 +33981,7 @@ def V6_vmpyowh_rnd : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -33858,7 +34004,7 @@ def V6_vmpyowh_rnd_sacc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33884,7 +34030,7 @@ def V6_vmpyowh_sacc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -33910,7 +34056,7 @@ def V6_vmpyub : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -33922,7 +34068,7 @@ def V6_vmpyub_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -33960,7 +34106,7 @@ def V6_vmpyubv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -33972,7 +34118,7 @@ def V6_vmpyubv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34010,7 +34156,7 @@ def V6_vmpyuh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34022,7 +34168,7 @@ def V6_vmpyuh_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -34060,7 +34206,7 @@ def V6_vmpyuhe : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vmpye($Vu32.uh,$Rt32.uh)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -34072,7 +34218,7 @@ def V6_vmpyuhe_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.uw += vmpye($Vu32.uh,$Rt32.uh)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
@@ -34086,7 +34232,7 @@ def V6_vmpyuhv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -34098,7 +34244,7 @@ def V6_vmpyuhv_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100001;
@@ -34136,7 +34282,7 @@ def V6_vmux : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
-tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110111;
@@ -34148,7 +34294,7 @@ def V6_vnavgb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vnavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
@@ -34171,7 +34317,7 @@ def V6_vnavgh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34194,7 +34340,7 @@ def V6_vnavgub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34217,7 +34363,7 @@ def V6_vnavgw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
@@ -34240,7 +34386,7 @@ def V6_vnccombine : HInst<
(outs HvxWR:$Vdd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011010010;
@@ -34254,7 +34400,7 @@ def V6_vncmov : HInst<
(outs HvxVR:$Vd32),
(ins PredRegs:$Ps4, HvxVR:$Vu32),
"if (!$Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001101000100000;
@@ -34268,7 +34414,7 @@ def V6_vnormamth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vnormamt($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
@@ -34291,7 +34437,7 @@ def V6_vnormamtw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.w = vnormamt($Vu32.w)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
@@ -34314,7 +34460,7 @@ def V6_vnot : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32 = vnot($Vu32)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
@@ -34326,7 +34472,7 @@ def V6_vor : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -34338,7 +34484,7 @@ def V6_vpackeb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34361,7 +34507,7 @@ def V6_vpackeh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34384,7 +34530,7 @@ def V6_vpackhb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34407,7 +34553,7 @@ def V6_vpackhub_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34430,7 +34576,7 @@ def V6_vpackob : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34453,7 +34599,7 @@ def V6_vpackoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34476,7 +34622,7 @@ def V6_vpackwh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -34499,7 +34645,7 @@ def V6_vpackwuh_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -34522,7 +34668,7 @@ def V6_vpopcounth : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vpopcount($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -34545,7 +34691,7 @@ def V6_vprefixqb : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.b = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100000010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34557,7 +34703,7 @@ def V6_vprefixqh : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.h = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100001010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34569,7 +34715,7 @@ def V6_vprefixqw : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qv4),
"$Vd32.w = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
let Inst{13-5} = 0b100010010;
let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
@@ -34581,7 +34727,7 @@ def V6_vrdelta : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vrdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -34593,7 +34739,7 @@ def V6_vrmpybub_rtt : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -34605,7 +34751,7 @@ def V6_vrmpybub_rtt_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -34643,7 +34789,7 @@ def V6_vrmpybus : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -34655,7 +34801,7 @@ def V6_vrmpybus_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -34693,7 +34839,7 @@ def V6_vrmpybusi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -34705,7 +34851,7 @@ def V6_vrmpybusi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -34743,7 +34889,7 @@ def V6_vrmpybusv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -34755,7 +34901,7 @@ def V6_vrmpybusv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34793,7 +34939,7 @@ def V6_vrmpybv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -34805,7 +34951,7 @@ def V6_vrmpybv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -34843,7 +34989,7 @@ def V6_vrmpyub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -34855,7 +35001,7 @@ def V6_vrmpyub_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -34893,7 +35039,7 @@ def V6_vrmpyub_rtt : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
@@ -34905,7 +35051,7 @@ def V6_vrmpyub_rtt_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
"$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001101;
@@ -34943,7 +35089,7 @@ def V6_vrmpyubi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -34955,7 +35101,7 @@ def V6_vrmpyubi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
@@ -34993,7 +35139,7 @@ def V6_vrmpyubv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
@@ -35005,7 +35151,7 @@ def V6_vrmpyubv_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100000;
@@ -35039,11 +35185,276 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vrmpyzbb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.ub)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbub_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.ub)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbub_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcbs_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcbs_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyznb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyznb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
def V6_vror : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, IntRegs:$Rt32),
"$Vd32 = vror($Vu32,$Rt32)",
-tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_6e7fa133, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
@@ -35051,11 +35462,34 @@ let hasNewValue = 1;
let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vrotr : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.uw = vrotr($Vu32.uw,$Vv32.uw)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrotr_alt : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32 = vrotr($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vroundhb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35078,7 +35512,7 @@ def V6_vroundhub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35101,7 +35535,7 @@ def V6_vrounduhub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -35124,7 +35558,7 @@ def V6_vrounduwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
@@ -35147,7 +35581,7 @@ def V6_vroundwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35170,7 +35604,7 @@ def V6_vroundwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35193,7 +35627,7 @@ def V6_vrsadubi : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
@@ -35205,7 +35639,7 @@ def V6_vrsadubi_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001010;
@@ -35239,11 +35673,23 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsatdw : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w = vsatdw($Vu32.w,$Vv32.w)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsathub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35266,7 +35712,7 @@ def V6_vsatuwuh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -35289,7 +35735,7 @@ def V6_vsatwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
@@ -35312,7 +35758,7 @@ def V6_vsb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.h = vsxt($Vu32.b)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35335,7 +35781,7 @@ def V6_vscattermh : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35346,7 +35792,7 @@ def V6_vscattermh_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.h).h += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35377,7 +35823,7 @@ def V6_vscattermhq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b00101111100;
let accessSize = HalfWordAccess;
@@ -35397,7 +35843,7 @@ def V6_vscattermhw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35408,7 +35854,7 @@ def V6_vscattermhw_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vvv32.w).h += $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
@@ -35420,7 +35866,7 @@ def V6_vscattermhwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_94f43c04, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
+tc_58d21193, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111101;
let accessSize = HalfWordAccess;
@@ -35431,7 +35877,7 @@ def V6_vscattermw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
@@ -35442,7 +35888,7 @@ def V6_vscattermw_add : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.w).w += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
@@ -35501,7 +35947,7 @@ def V6_vscattermwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111100;
let accessSize = WordAccess;
@@ -35521,7 +35967,7 @@ def V6_vsh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.w = vsxt($Vu32.h)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35544,7 +35990,7 @@ def V6_vshufeh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35567,7 +36013,7 @@ def V6_vshuff : HInst<
(outs HvxVR:$Vy32, HvxVR:$Vx32),
(ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
"vshuff($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001111;
@@ -35582,7 +36028,7 @@ def V6_vshuffb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.b = vshuff($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -35605,7 +36051,7 @@ def V6_vshuffeb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35628,7 +36074,7 @@ def V6_vshuffh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32),
"$Vd32.h = vshuff($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -35651,7 +36097,7 @@ def V6_vshuffob : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35674,7 +36120,7 @@ def V6_vshuffvdd : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
@@ -35686,7 +36132,7 @@ def V6_vshufoeb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35709,7 +36155,7 @@ def V6_vshufoeh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35732,7 +36178,7 @@ def V6_vshufoh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
@@ -35755,7 +36201,7 @@ def V6_vsubb : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -35778,7 +36224,7 @@ def V6_vsubb_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -35801,7 +36247,7 @@ def V6_vsubbnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -35827,7 +36273,7 @@ def V6_vsubbq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -35853,7 +36299,7 @@ def V6_vsubbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
@@ -35876,7 +36322,7 @@ def V6_vsubbsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -35899,7 +36345,7 @@ def V6_vsubcarry : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qx4),
(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
@@ -35908,11 +36354,25 @@ let opNewValue = 0;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
+def V6_vsubcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vsub($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsubh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -35935,7 +36395,7 @@ def V6_vsubh_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -35958,7 +36418,7 @@ def V6_vsubhnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -35984,7 +36444,7 @@ def V6_vsubhq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000001;
@@ -36010,7 +36470,7 @@ def V6_vsubhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36033,7 +36493,7 @@ def V6_vsubhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36056,7 +36516,7 @@ def V6_vsubhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36079,7 +36539,7 @@ def V6_vsububh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36102,7 +36562,7 @@ def V6_vsububsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36125,7 +36585,7 @@ def V6_vsububsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36148,7 +36608,7 @@ def V6_vsubububb_sat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -36160,7 +36620,7 @@ def V6_vsubuhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36183,7 +36643,7 @@ def V6_vsubuhsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36206,7 +36666,7 @@ def V6_vsubuhw : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36229,7 +36689,7 @@ def V6_vsubuwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
@@ -36252,7 +36712,7 @@ def V6_vsubuwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
@@ -36275,7 +36735,7 @@ def V6_vsubw : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
@@ -36298,7 +36758,7 @@ def V6_vsubw_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
@@ -36321,7 +36781,7 @@ def V6_vsubwnq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if (!$Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -36347,7 +36807,7 @@ def V6_vsubwq : HInst<
(outs HvxVR:$Vx32),
(ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
"if ($Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{21-16} = 0b000010;
@@ -36373,7 +36833,7 @@ def V6_vsubwsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
@@ -36396,7 +36856,7 @@ def V6_vsubwsat_dv : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, HvxWR:$Vvv32),
"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
@@ -36419,7 +36879,7 @@ def V6_vswap : HInst<
(outs HvxWR:$Vdd32),
(ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
-tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
+tc_71646d06, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110101;
@@ -36431,7 +36891,7 @@ def V6_vtmpyb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -36443,7 +36903,7 @@ def V6_vtmpyb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36481,7 +36941,7 @@ def V6_vtmpybus : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
@@ -36493,7 +36953,7 @@ def V6_vtmpybus_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36531,7 +36991,7 @@ def V6_vtmpyhb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
@@ -36543,7 +37003,7 @@ def V6_vtmpyhb_acc : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001000;
@@ -36595,7 +37055,7 @@ def V6_vunpackb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.h = vunpack($Vu32.b)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36618,7 +37078,7 @@ def V6_vunpackh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.w = vunpack($Vu32.h)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36641,7 +37101,7 @@ def V6_vunpackob : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32),
"$Vxx32.h |= vunpacko($Vu32.b)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000000;
@@ -36667,7 +37127,7 @@ def V6_vunpackoh : HInst<
(outs HvxWR:$Vxx32),
(ins HvxWR:$Vxx32in, HvxVR:$Vu32),
"$Vxx32.w |= vunpacko($Vu32.h)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000000;
@@ -36694,7 +37154,7 @@ def V6_vunpackub : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uh = vunpack($Vu32.ub)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36717,7 +37177,7 @@ def V6_vunpackuh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uw = vunpack($Vu32.uh)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
@@ -36740,7 +37200,7 @@ def V6_vwhist128 : HInst<
(outs),
(ins),
"vwhist128",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36749,7 +37209,7 @@ def V6_vwhist128m : HInst<
(outs),
(ins u1_0Imm:$Ii),
"vwhist128(#$Ii)",
-tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
+tc_b28e51aa, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{31-16} = 0b0001111000000000;
@@ -36759,7 +37219,7 @@ def V6_vwhist128q : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist128($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36769,7 +37229,7 @@ def V6_vwhist128qm : HInst<
(outs),
(ins HvxQR:$Qv4, u1_0Imm:$Ii),
"vwhist128($Qv4,#$Ii)",
-tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
+tc_767c4e9d, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{21-16} = 0b000010;
@@ -36780,7 +37240,7 @@ def V6_vwhist256 : HInst<
(outs),
(ins),
"vwhist256",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36789,7 +37249,7 @@ def V6_vwhist256_sat : HInst<
(outs),
(ins),
"vwhist256:sat",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{31-16} = 0b0001111000000000;
let DecoderNamespace = "EXT_mmvec";
@@ -36798,7 +37258,7 @@ def V6_vwhist256q : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist256($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36808,7 +37268,7 @@ def V6_vwhist256q_sat : HInst<
(outs),
(ins HvxQR:$Qv4),
"vwhist256($Qv4):sat",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
@@ -36818,7 +37278,7 @@ def V6_vxor : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32 = vxor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
@@ -36830,7 +37290,7 @@ def V6_vzb : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uh = vzxt($Vu32.ub)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -36853,7 +37313,7 @@ def V6_vzh : HInst<
(outs HvxWR:$Vdd32),
(ins HvxVR:$Vu32),
"$Vdd32.uw = vzxt($Vu32.uh)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
@@ -36872,11 +37332,122 @@ let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_zLd_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"z = vmem($Rt32+#$Ii)",
+tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101100000;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"z = vmem($Rx32++#$Ii)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"z = vmem($Rx32++$Mu2)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_44661f, Requires<[UseHVXV66,UseZReg]> {
+let Inst{12-0} = 0b0000000000001;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rt32+#$Ii)",
+tc_dd5b0695, TypeCVI_ZW>, Enc_ef601b, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b00101100100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rx32++#$Ii)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_6baed4, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) z = vmem($Rx32++$Mu2)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_691712, Requires<[UseHVXV66,UseZReg]> {
+let Inst{10-0} = 0b00000000001;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zextract : HInst<
+(outs HvxVR:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = zextract($Rt32)",
+tc_5bf8afbb, TypeCVI_VP>, Enc_a5ed8a, Requires<[UseHVXV66,UseZReg]> {
+let Inst{13-5} = 0b000001001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zld0 : HInst<
+(outs),
+(ins IntRegs:$Rt32),
+"z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zldp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32),
+"if ($Pv4) z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def Y2_barrier : HInst<
(outs),
(ins),
"barrier",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100000000000;
let isSoloAX = 1;
@@ -36886,7 +37457,7 @@ def Y2_break : HInst<
(outs),
(ins),
"brkpt",
-tc_4ca572d4, TypeCR>, Enc_e3b0c4 {
+tc_9ad9998f, TypeCR>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0110110000100000;
let isSolo = 1;
@@ -36895,7 +37466,7 @@ def Y2_dccleana : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleana($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000000;
let isRestrictSlot1AOK = 1;
@@ -36905,7 +37476,7 @@ def Y2_dccleaninva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleaninva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000010;
let isRestrictSlot1AOK = 1;
@@ -36915,7 +37486,7 @@ def Y2_dcfetch : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcfetch($Rs32)",
-tc_3da80ba5, TypeMAPPING> {
+tc_d63f638c, TypeMAPPING> {
let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -36924,7 +37495,7 @@ def Y2_dcfetchbo : HInst<
(outs),
(ins IntRegs:$Rs32, u11_3Imm:$Ii),
"dcfetch($Rs32+#$Ii)",
-tc_4d9914c9, TypeLD>, Enc_2d829e {
+tc_9ca930f7, TypeLD>, Enc_2d829e {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10010100000;
let addrMode = BaseImmOffset;
@@ -36935,7 +37506,7 @@ def Y2_dcinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcinva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000001;
let isRestrictSlot1AOK = 1;
@@ -36945,7 +37516,7 @@ def Y2_dczeroa : HInst<
(outs),
(ins IntRegs:$Rs32),
"dczeroa($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000110;
let isRestrictSlot1AOK = 1;
@@ -36956,7 +37527,7 @@ def Y2_icinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"icinva($Rs32)",
-tc_999d32db, TypeJ>, Enc_ecbcc8 {
+tc_5d7f5414, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010110110;
let isSolo = 1;
@@ -36965,7 +37536,7 @@ def Y2_isync : HInst<
(outs),
(ins),
"isync",
-tc_b13761ae, TypeJ>, Enc_e3b0c4 {
+tc_8b121f4a, TypeJ>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000010;
let Inst{31-16} = 0b0101011111000000;
let isSolo = 1;
@@ -36974,16 +37545,25 @@ def Y2_syncht : HInst<
(outs),
(ins),
"syncht",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100001000000;
let isSolo = 1;
}
+def Y2_wait : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"wait($Rs32)",
+tc_174516e8, TypeCR>, Enc_ecbcc8, Requires<[HasV65]> {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100100010;
+let isSolo = 1;
+}
def Y4_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"l2fetch($Rs32,$Rt32)",
-tc_daa058fa, TypeST>, Enc_ca3887 {
+tc_fe211424, TypeST>, Enc_ca3887 {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110000;
@@ -36995,7 +37575,7 @@ def Y4_trace : HInst<
(outs),
(ins IntRegs:$Rs32),
"trace($Rs32)",
-tc_c82dc1ff, TypeCR>, Enc_ecbcc8 {
+tc_6b25e783, TypeCR>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01100010010;
let isSoloAX = 1;
@@ -37004,7 +37584,7 @@ def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"l2fetch($Rs32,$Rtt32)",
-tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
+tc_fe211424, TypeST>, Enc_e6abcf {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110100;
@@ -37016,7 +37596,7 @@ def dep_A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37029,7 +37609,7 @@ def dep_A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37042,7 +37622,7 @@ def dep_S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32):deprecated",
-tc_540fdfbc, TypeALU64>, Enc_be32a5 {
+tc_946df596, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100000;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
new file mode 100644
index 000000000000..2346fa572626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -0,0 +1,3337 @@
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+
+// V5 Scalar Instructions.
+
+def: Pat<(int_hexagon_S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh DoubleRegs:$src1),
+ (S2_vsatwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpysu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
+ (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_notp DoubleRegs:$src1),
+ (A2_notp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brevp DoubleRegs:$src1),
+ (S2_brevp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1 IntRegs:$src1),
+ (S2_cl1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplte IntRegs:$src1, IntRegs:$src2),
+ (C4_cmplte IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
+ (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abssat IntRegs:$src1),
+ (A2_abssat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2),
+ (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
+ (F2_conv_df2uw_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheq IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpheq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+ (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
+ (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
+ (F2_conv_w2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_not PredRegs:$src1),
+ (C2_not PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrpr PredRegs:$src1),
+ (C2_tfrpr PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_orp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2),
+ (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
+ (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
+ (M4_pmpyw IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb DoubleRegs:$src1),
+ (S2_vsathb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_absp DoubleRegs:$src1),
+ (A2_absp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_all8 PredRegs:$src1),
+ (C2_all8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
+ (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2),
+ (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2),
+ (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2),
+ (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9 PredRegs:$src1, PredRegs:$src2),
+ (C4_fastcorner9 PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subsat IntRegs:$src1, IntRegs:$src2),
+ (A2_subsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
+ (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
+ (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
+ (F2_sfadd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sub IntRegs:$src1, IntRegs:$src2),
+ (A2_sub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
+ (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
+ (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
+ (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbp DoubleRegs:$src1),
+ (S2_clbp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_deinterleave DoubleRegs:$src1),
+ (S2_deinterleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
+ (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
+ (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
+ (S2_vsathb_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_clrbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1),
+ (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negsat IntRegs:$src1),
+ (A2_negsat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitsplit IntRegs:$src1, IntRegs:$src2),
+ (A4_bitsplit IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabshsat DoubleRegs:$src1),
+ (A2_vabshsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyui IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupn IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xorp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_xorp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrpcp DoubleRegs:$src1),
+ (A4_tfrpcp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$src1),
+ (A2_zxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$src1),
+ (A2_zxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
+ (A2_sat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsat IntRegs:$src1, IntRegs:$src2),
+ (A2_addsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
+ (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+ (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_xor PredRegs:$src1, PredRegs:$src2),
+ (C2_xor PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud_chop DoubleRegs:$src1),
+ (F2_conv_df2ud_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub DoubleRegs:$src1),
+ (S2_vsathub DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2sf DoubleRegs:$src1),
+ (F2_conv_df2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$src1),
+ (A2_sxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$src1),
+ (A2_sxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtw IntRegs:$src1),
+ (A2_sxtw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1p DoubleRegs:$src1),
+ (S2_cl1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andnp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_andnp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpeq IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbnorm IntRegs:$src1),
+ (S2_clbnorm IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
+ (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
+ (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+ (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1 IntRegs:$src1),
+ (S2_ct1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0 IntRegs:$src1),
+ (S2_ct0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
+ (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
+ (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
+ (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2),
+ (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2),
+ (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
+ (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combinew IntRegs:$src1, IntRegs:$src2),
+ (A2_combinew IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
+ (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_modwrapu IntRegs:$src1, IntRegs:$src2),
+ (A4_modwrapu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1),
+ (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1),
+ (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
+ (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2),
+ (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
+ (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minu IntRegs:$src1, IntRegs:$src2),
+ (A2_minu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpge IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpge IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmin IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmin IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpgt IntRegs:$src1, IntRegs:$src2),
+ (F2_sfcmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh IntRegs:$src1, IntRegs:$src2),
+ (M4_vpmpyh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_roundsat DoubleRegs:$src1),
+ (A2_roundsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1p DoubleRegs:$src1),
+ (S2_ct1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2),
+ (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1),
+ (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtu IntRegs:$src1, IntRegs:$src2),
+ (A4_cmphgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sath IntRegs:$src1),
+ (A2_sath IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satb IntRegs:$src1),
+ (A2_satb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4),
+ (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_asrh IntRegs:$src1),
+ (A2_asrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_or PredRegs:$src1, PredRegs:$src2),
+ (C2_or PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$src1, IntRegs:$src2),
+ (A2_xor IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_add IntRegs:$src1, IntRegs:$src2),
+ (A2_add IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfinvsqrta IntRegs:$src1),
+ (F2_sfinvsqrta IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0p DoubleRegs:$src1),
+ (S2_ct0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddh IntRegs:$src1, IntRegs:$src2),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcrotate DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcrotate DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_aslh IntRegs:$src1),
+ (A2_aslh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrh IntRegs:$src1),
+ (S2_vsplatrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrb IntRegs:$src1),
+ (S2_vsplatrb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+ (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_pxfer_map PredRegs:$src1),
+ (C2_pxfer_map PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3),
+ (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1),
+ (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
+ (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmpy IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmpy IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mask PredRegs:$src1),
+ (C2_mask PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrrcr IntRegs:$src1),
+ (A2_tfrrcr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
+ (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d_chop DoubleRegs:$src1),
+ (F2_conv_df2d_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
+ (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2),
+ (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3),
+ (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpnorm DoubleRegs:$src1),
+ (S4_clbpnorm DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr_sat IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw IntRegs:$src1),
+ (F2_conv_sf2uw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud IntRegs:$src1),
+ (F2_conv_sf2ud IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw_chop IntRegs:$src1),
+ (F2_conv_sf2uw_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh_nopack DoubleRegs:$src1),
+ (S2_vsatwuh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubuhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubuhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybsu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybsu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vitpack PredRegs:$src1, PredRegs:$src2),
+ (C2_vitpack PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclr IntRegs:$src1, IntRegs:$src2),
+ (C4_nbitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
+ (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+ (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
+ (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+ (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneq IntRegs:$src1, IntRegs:$src2),
+ (C4_cmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clb IntRegs:$src1),
+ (S2_clb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
+ (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1),
+ (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
+ (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1),
+ (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3),
+ (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+ (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
+ (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2),
+ (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2),
+ (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
+ (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2),
+ (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2),
+ (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_or IntRegs:$src1, IntRegs:$src2),
+ (A2_or IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeq IntRegs:$src1, IntRegs:$src2),
+ (C2_cmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrp DoubleRegs:$src1),
+ (A2_tfrp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub_nopack DoubleRegs:$src1),
+ (S2_vsathub_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satuh IntRegs:$src1),
+ (A2_satuh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
+ (A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
+ (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2),
+ (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2),
+ (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+ (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
+ (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
+ (A2_vabsw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
+ (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
+ (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
+ (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2),
+ (A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w_chop DoubleRegs:$src1),
+ (F2_conv_df2w_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_and IntRegs:$src1, IntRegs:$src2),
+ (A2_and IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+ (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
+ (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
+ (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2),
+ (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
+ (S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+ (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
+ (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
+ (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2),
+ (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
+ (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2),
+ (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2df IntRegs:$src1),
+ (F2_conv_sf2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunohb DoubleRegs:$src1),
+ (S2_vtrunohb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d_chop IntRegs:$src1),
+ (F2_conv_sf2d_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
+ (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
+ (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svadduhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svadduhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w_chop IntRegs:$src1),
+ (F2_conv_sf2w_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathub IntRegs:$src1),
+ (S2_svsathub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_setbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
+ (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2),
+ (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
+ (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
+ (F2_conv_df2uw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2),
+ (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
+ (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_minup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminb DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2),
+ (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrrp IntRegs:$src1),
+ (C2_tfrrp IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abs IntRegs:$src1),
+ (A2_abs IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
+ (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
+ (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1),
+ (S2_vsatwuh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
+ (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2),
+ (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3),
+ (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+ (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2),
+ (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
+ (S2_vsxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_subp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_parity IntRegs:$src1, IntRegs:$src2),
+ (S4_parity IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2),
+ (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud_chop IntRegs:$src1),
+ (F2_conv_sf2ud_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2),
+ (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+ (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
+ (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwh DoubleRegs:$src1),
+ (S2_vrndpackwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
+ (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+ (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+ (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+ (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+
+// V55 Scalar Instructions.
+
+def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV55]>;
+
+// V60 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+ (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+ (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+ (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+ (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+
+// V62 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_V6_ldntnt0 IntRegs:$src1),
+ (V6_ldntnt0 IntRegs:$src1)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2),
+ (M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2),
+ (M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2),
+ (A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vsplatrbp IntRegs:$src1),
+ (S6_vsplatrbp IntRegs:$src1)>, Requires<[HasV62]>;
+
+// V65 Scalar Instructions.
+
+def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2),
+ (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>;
+
+// V66 Scalar Instructions.
+
+def: Pat<(int_hexagon_F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2),
+ (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>;
+
+// V60 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminub HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsathub HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsathub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhb HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmux_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavghrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavghrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackob HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackob_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lo HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lo_128B HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldu0 IntRegs:$src1),
+ (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldu0_128B IntRegs:$src1),
+ (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsh HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsh_128B HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsb HvxVR:$src1),
+ (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsb_128B HvxVR:$src1),
+ (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffh HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffh_128B HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamth HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamth_128B HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzh HvxVR:$src1),
+ (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzh_128B HvxVR:$src1),
+ (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxub HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vror HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vror_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat_128B HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb_128B HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzb HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzb_128B HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabusv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabusv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vswap_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackub HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackub_128B HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_extractw HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_extractw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackob HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackob_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vxor HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vxor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb4w HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb4w_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffob HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffob_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamtw HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamtw_128B HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackuh HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackuh_128B HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackoh HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackoh_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhss HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhss_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_hi HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_hi_128B HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_128B HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcombine HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcombine_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vinsertwr HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vinsertwr_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffb HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffb_128B HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vand HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vand_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0h HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0h_128B HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ld0 IntRegs:$src1),
+ (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ld0_128B IntRegs:$src1),
+ (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpopcounth HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpopcounth_128B HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnt0 IntRegs:$src1),
+ (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnt0_128B IntRegs:$src1),
+ (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2 IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2_128B IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealh HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealh_128B HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vor HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0w HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0w_128B HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vd0 ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc_128B HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassign HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassign_128B HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackb HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackb_128B HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackh HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackh_128B HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth HvxVR:$src1, HvxVR:$src2),
+ (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnot HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnot_128B HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxw HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat_128B HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhub HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassignp HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassignp_128B HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatw IntRegs:$src1),
+ (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatw_128B IntRegs:$src1),
+ (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_not HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_not_128B HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_xor HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_xor_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+
+// V62 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqh HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqh_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqw HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqw_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminb HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvnqv HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvnqv_128B HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatb IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatb_128B IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplath IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplath_128B IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduhub HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduhub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandnqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxb HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvqv HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvqv_128B HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2 IntRegs:$src1),
+ (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2_128B IntRegs:$src1),
+ (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2),
+ (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+ (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+
+// V65 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1),
+ (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1),
+ (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdd0 ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+
+// V66 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarrysat_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasr_into_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatdw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatdw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrotr HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrotr_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
index 03c504ff0b08..b3132d41b903 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -1,4 +1,4 @@
-//===- HexagonDepMappings.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,7 +9,6 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def A2_negAlias : InstAlias<"$Rd32 = neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
def A2_notAlias : InstAlias<"$Rd32 = not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>;
def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32 = $Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
@@ -252,6 +251,7 @@ def V6_vaslhv_altAlias : InstAlias<"$Vd32 = vaslh($Vu32,$Vv32)", (V6_vaslhv HvxV
def V6_vaslw_acc_altAlias : InstAlias<"$Vx32 += vaslw($Vu32,$Rt32)", (V6_vaslw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vaslw_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Rt32)", (V6_vaslw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vaslwv_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Vv32)", (V6_vaslwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasr_into_altAlias : InstAlias<"$Vxx32 = vasrinto($Vu32,$Vv32)", (V6_vasr_into HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vasrh_acc_altAlias : InstAlias<"$Vx32 += vasrh($Vu32,$Rt32)", (V6_vasrh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrh_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
@@ -402,6 +402,7 @@ def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32 += vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vrmpyubv_altAlias : InstAlias<"$Vd32 = vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrotr_altAlias : InstAlias<"$Vd32 = vrotr($Vu32,$Vv32)", (V6_vrotr HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vroundhb_altAlias : InstAlias<"$Vd32 = vroundhb($Vu32,$Vv32):sat", (V6_vroundhb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vroundhub_altAlias : InstAlias<"$Vd32 = vroundhub($Vu32,$Vv32):sat", (V6_vroundhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vrounduhub_altAlias : InstAlias<"$Vd32 = vrounduhub($Vu32,$Vv32):sat", (V6_vrounduhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
@@ -473,4 +474,6 @@ def V6_vunpackub_altAlias : InstAlias<"$Vdd32 = vunpackub($Vu32)", (V6_vunpackub
def V6_vunpackuh_altAlias : InstAlias<"$Vdd32 = vunpackuh($Vu32)", (V6_vunpackuh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
def V6_vzb_altAlias : InstAlias<"$Vdd32 = vzxtb($Vu32)", (V6_vzb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
+def V6_zld0Alias : InstAlias<"z = vmem($Rt32)", (V6_zLd_ai IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_zldp0Alias : InstAlias<"if ($Pv4) z = vmem($Rt32)", (V6_zLd_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
index 9d960953f8f5..ef2d4fa45702 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonDepOperands.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,16 +9,12 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
-
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
-def s10_6ImmOperand : AsmOperandClass { let Name = "s10_6Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_6Imm : Operand<i32> { let ParserMatchClass = s10_6ImmOperand; let DecoderMethod = "s10_6ImmDecoder"; }
-def s10_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 6>(N->getSExtValue());}]>;
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
@@ -130,6 +126,3 @@ def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtVal
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
-def s10_0ImmOperand : AsmOperandClass { let Name = "s10_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_0Imm : Operand<i32> { let ParserMatchClass = s10_0ImmOperand; let DecoderMethod = "s10_0ImmDecoder"; }
-def s10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 0>(N->getSExtValue());}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
index 656c83f2d0c4..0fd55e8b7997 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -1,4 +1,4 @@
-//===- HexagonDepTimingClasses.h ------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,7 +10,6 @@
//===----------------------------------------------------------------------===//
-
#ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
#define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
@@ -20,19 +19,25 @@ namespace llvm {
inline bool is_TC3x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_16d0d8d5:
- case Hexagon::Sched::tc_1853ea6d:
- case Hexagon::Sched::tc_60571023:
- case Hexagon::Sched::tc_7934b9df:
- case Hexagon::Sched::tc_8fd5f294:
- case Hexagon::Sched::tc_b9c0b731:
- case Hexagon::Sched::tc_bcc96cee:
- case Hexagon::Sched::tc_c6ce9b3f:
- case Hexagon::Sched::tc_c6ebf8dd:
- case Hexagon::Sched::tc_c82dc1ff:
- case Hexagon::Sched::tc_caaebcba:
- case Hexagon::Sched::tc_cf59f215:
- case Hexagon::Sched::tc_e913dc32:
+ case Hexagon::Sched::tc_05d3a09b:
+ case Hexagon::Sched::tc_0d8f5752:
+ case Hexagon::Sched::tc_13bfbcf9:
+ case Hexagon::Sched::tc_174516e8:
+ case Hexagon::Sched::tc_1a2fd869:
+ case Hexagon::Sched::tc_1c4528a2:
+ case Hexagon::Sched::tc_32779c6f:
+ case Hexagon::Sched::tc_5b54b33f:
+ case Hexagon::Sched::tc_6b25e783:
+ case Hexagon::Sched::tc_76851da1:
+ case Hexagon::Sched::tc_9debc299:
+ case Hexagon::Sched::tc_a9d88b22:
+ case Hexagon::Sched::tc_bafaade3:
+ case Hexagon::Sched::tc_bcf98408:
+ case Hexagon::Sched::tc_bdceeac1:
+ case Hexagon::Sched::tc_c8ce0b5c:
+ case Hexagon::Sched::tc_d1aa9eaa:
+ case Hexagon::Sched::tc_d773585a:
+ case Hexagon::Sched::tc_df3319ed:
return true;
default:
return false;
@@ -41,8 +46,8 @@ inline bool is_TC3x(unsigned SchedClass) {
inline bool is_TC2early(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_14cd4cfa:
- case Hexagon::Sched::tc_2a160009:
+ case Hexagon::Sched::tc_b4407292:
+ case Hexagon::Sched::tc_fc3999b4:
return true;
default:
return false;
@@ -51,12 +56,13 @@ inline bool is_TC2early(unsigned SchedClass) {
inline bool is_TC4x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_038a1342:
- case Hexagon::Sched::tc_4d99bca9:
- case Hexagon::Sched::tc_6792d5ff:
- case Hexagon::Sched::tc_9c00ce8d:
- case Hexagon::Sched::tc_d580173f:
- case Hexagon::Sched::tc_f3eaa14b:
+ case Hexagon::Sched::tc_2f7c551d:
+ case Hexagon::Sched::tc_2ff964b4:
+ case Hexagon::Sched::tc_3a867367:
+ case Hexagon::Sched::tc_3b470976:
+ case Hexagon::Sched::tc_4560740b:
+ case Hexagon::Sched::tc_a58fd5cc:
+ case Hexagon::Sched::tc_b8bffe55:
return true;
default:
return false;
@@ -65,23 +71,27 @@ inline bool is_TC4x(unsigned SchedClass) {
inline bool is_TC2(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_00afc57e:
- case Hexagon::Sched::tc_1b9c9ee5:
- case Hexagon::Sched::tc_234a11a5:
- case Hexagon::Sched::tc_2b6f77c6:
- case Hexagon::Sched::tc_41d5298e:
- case Hexagon::Sched::tc_5ba5997d:
- case Hexagon::Sched::tc_84df2cd3:
- case Hexagon::Sched::tc_87735c3b:
- case Hexagon::Sched::tc_897d1a9d:
- case Hexagon::Sched::tc_976ddc4f:
- case Hexagon::Sched::tc_b44c6e2a:
- case Hexagon::Sched::tc_b9c4623f:
- case Hexagon::Sched::tc_c2f7d806:
- case Hexagon::Sched::tc_c74f796f:
- case Hexagon::Sched::tc_d088982c:
- case Hexagon::Sched::tc_ef84f62f:
- case Hexagon::Sched::tc_f49e76f4:
+ case Hexagon::Sched::tc_002cb246:
+ case Hexagon::Sched::tc_14b5c689:
+ case Hexagon::Sched::tc_1c80410a:
+ case Hexagon::Sched::tc_4414d8b1:
+ case Hexagon::Sched::tc_6132ba3d:
+ case Hexagon::Sched::tc_61830035:
+ case Hexagon::Sched::tc_679309b8:
+ case Hexagon::Sched::tc_703e822c:
+ case Hexagon::Sched::tc_779080bf:
+ case Hexagon::Sched::tc_784490da:
+ case Hexagon::Sched::tc_88b4f13d:
+ case Hexagon::Sched::tc_9461ff31:
+ case Hexagon::Sched::tc_9e313203:
+ case Hexagon::Sched::tc_a813cf9a:
+ case Hexagon::Sched::tc_bfec0f01:
+ case Hexagon::Sched::tc_cf8126ae:
+ case Hexagon::Sched::tc_d08ee0f4:
+ case Hexagon::Sched::tc_e4a7f9f0:
+ case Hexagon::Sched::tc_f429765c:
+ case Hexagon::Sched::tc_f675fee8:
+ case Hexagon::Sched::tc_f9058dd7:
return true;
default:
return false;
@@ -90,45 +100,43 @@ inline bool is_TC2(unsigned SchedClass) {
inline bool is_TC1(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_181af5d0:
- case Hexagon::Sched::tc_1b82a277:
- case Hexagon::Sched::tc_1e856f58:
- case Hexagon::Sched::tc_351fed2d:
- case Hexagon::Sched::tc_3669266a:
- case Hexagon::Sched::tc_3cb8ea06:
- case Hexagon::Sched::tc_452f85af:
- case Hexagon::Sched::tc_481e5e5c:
- case Hexagon::Sched::tc_49eb22c8:
- case Hexagon::Sched::tc_523fcf30:
- case Hexagon::Sched::tc_52d7bbea:
- case Hexagon::Sched::tc_53bc8a6a:
- case Hexagon::Sched::tc_540fdfbc:
- case Hexagon::Sched::tc_55050d58:
- case Hexagon::Sched::tc_609d2efe:
- case Hexagon::Sched::tc_68cb12ce:
- case Hexagon::Sched::tc_6ebb4a12:
- case Hexagon::Sched::tc_6efc556e:
- case Hexagon::Sched::tc_73043bf4:
- case Hexagon::Sched::tc_7a830544:
- case Hexagon::Sched::tc_855b0b61:
- case Hexagon::Sched::tc_8fe6b782:
- case Hexagon::Sched::tc_90f3e30c:
- case Hexagon::Sched::tc_97743097:
- case Hexagon::Sched::tc_99be14ca:
- case Hexagon::Sched::tc_9faf76ae:
- case Hexagon::Sched::tc_a46f0df5:
- case Hexagon::Sched::tc_a904d137:
- case Hexagon::Sched::tc_b9488031:
- case Hexagon::Sched::tc_be706f30:
- case Hexagon::Sched::tc_c6aa82f7:
- case Hexagon::Sched::tc_cde8b071:
- case Hexagon::Sched::tc_d6bf0472:
- case Hexagon::Sched::tc_dbdffe3d:
- case Hexagon::Sched::tc_e0739b8c:
- case Hexagon::Sched::tc_e1e99bfa:
- case Hexagon::Sched::tc_e9fae2d6:
- case Hexagon::Sched::tc_f2704b9a:
- case Hexagon::Sched::tc_f8eeed7a:
+ case Hexagon::Sched::tc_0663f615:
+ case Hexagon::Sched::tc_0a705168:
+ case Hexagon::Sched::tc_0ae0825c:
+ case Hexagon::Sched::tc_1b6f7cec:
+ case Hexagon::Sched::tc_1fc97744:
+ case Hexagon::Sched::tc_20cdee80:
+ case Hexagon::Sched::tc_2332b92e:
+ case Hexagon::Sched::tc_2eabeebe:
+ case Hexagon::Sched::tc_3d495a39:
+ case Hexagon::Sched::tc_4c5ba658:
+ case Hexagon::Sched::tc_56336eb0:
+ case Hexagon::Sched::tc_56f114f4:
+ case Hexagon::Sched::tc_57890846:
+ case Hexagon::Sched::tc_5a2711e5:
+ case Hexagon::Sched::tc_5b7c0967:
+ case Hexagon::Sched::tc_640086b5:
+ case Hexagon::Sched::tc_643b4717:
+ case Hexagon::Sched::tc_85c9c08f:
+ case Hexagon::Sched::tc_85d5d03f:
+ case Hexagon::Sched::tc_862b3e70:
+ case Hexagon::Sched::tc_946df596:
+ case Hexagon::Sched::tc_9c3ecd83:
+ case Hexagon::Sched::tc_9fc3dae0:
+ case Hexagon::Sched::tc_a1123dda:
+ case Hexagon::Sched::tc_a1c00888:
+ case Hexagon::Sched::tc_ae53734a:
+ case Hexagon::Sched::tc_b31c2e97:
+ case Hexagon::Sched::tc_b4b5c03a:
+ case Hexagon::Sched::tc_b51dc29a:
+ case Hexagon::Sched::tc_cd374165:
+ case Hexagon::Sched::tc_cfd8378a:
+ case Hexagon::Sched::tc_d5b7b0c1:
+ case Hexagon::Sched::tc_d9d43ecb:
+ case Hexagon::Sched::tc_db2bce9c:
+ case Hexagon::Sched::tc_de4df740:
+ case Hexagon::Sched::tc_de554571:
+ case Hexagon::Sched::tc_e78647bd:
return true;
default:
return false;
@@ -136,4 +144,4 @@ inline bool is_TC1(unsigned SchedClass) {
}
} // namespace llvm
-#endif
+#endif \ No newline at end of file
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 557e6384be6a..8e2f5093038e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -731,9 +731,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
MIB.add(MO);
// Set memory references.
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(*MI);
MI->eraseFromParent();
return;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 7e774674e0c0..1a762c0c9de7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -891,14 +891,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
MB.add(MO);
Ox++;
}
-
- MachineFunction &MF = *B.getParent();
- MachineInstr::mmo_iterator I = MI.memoperands_begin();
- unsigned NR = std::distance(I, MI.memoperands_end());
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
- for (unsigned i = 0; i < NR; ++i)
- MemRefs[i] = *I++;
- MB.setMemRefs(MemRefs, MemRefs+NR);
+ MB.cloneMemRefs(MI);
MachineInstr *NewI = MB;
NewI->clearKillInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 97b02e2b34cb..f5736546a87c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -550,6 +550,37 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+/// Returns true if the target can safely skip saving callee-saved registers
+/// for noreturn nounwind functions.
+bool HexagonFrameLowering::enableCalleeSaveSkip(
+ const MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ assert(F.hasFnAttribute(Attribute::NoReturn) &&
+ F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !F.getFunction().hasFnAttribute(Attribute::UWTable));
+ (void)F;
+
+ // No need to save callee saved registers if the function does not return.
+ return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
+}
+
+// Helper function used to determine when to eliminate the stack frame for
+// functions marked as noreturn and when the noreturn-stack-elim options are
+// specified. When both these conditions are true, then a FP may not be needed
+// if the function makes a call. It is very similar to enableCalleeSaveSkip,
+// but it used to check if the allocframe can be eliminated as well.
+static bool enableAllocFrameElim(const MachineFunction &MF) {
+ const auto &F = MF.getFunction();
+ const auto &MFI = MF.getFrameInfo();
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ assert(!MFI.hasVarSizedObjects() &&
+ !HST.getRegisterInfo()->needsStackRealignment(MF));
+ return F.hasFnAttribute(Attribute::NoReturn) &&
+ F.hasFnAttribute(Attribute::NoUnwind) &&
+ !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
+ MFI.getStackSize() == 0;
+}
+
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
bool PrologueStubs) const {
MachineFunction &MF = *MBB.getParent();
@@ -994,7 +1025,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
}
const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
- if (MFI.hasCalls() || HMFI.hasClobberLR())
+ if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
return true;
return false;
@@ -1266,7 +1297,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
// Call spill function.
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
- : MBB.getLastNonDebugInstr()->getDebugLoc();
+ : MBB.findDebugLoc(MBB.end());
MachineInstr *DeallocCall = nullptr;
if (HasTC) {
@@ -1579,10 +1610,10 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
// S2_storeri_io FI, 0, TmpR
BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(TmpR, RegState::Kill)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(TmpR, RegState::Kill)
+ .cloneMemRefs(*MI);
NewRegs.push_back(TmpR);
B.erase(It);
@@ -1604,9 +1635,9 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
// TmpR = L2_loadri_io FI, 0
unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
// DstR = C2_tfrrp TmpR if DstR is a predicate register
// DstR = A2_tfrrcr TmpR if DstR is a modifier register
@@ -1708,7 +1739,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
// register that is entirely undefined.
LivePhysRegs LPR(HRI);
LPR.addLiveIns(B);
- SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
for (auto R = B.begin(); R != It; ++R) {
Clobbers.clear();
LPR.stepForward(*R, Clobbers);
@@ -1731,10 +1762,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcLo, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcLo, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
}
// Store high part.
@@ -1742,10 +1773,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(Size)
- .addReg(SrcHi, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(Size)
+ .addReg(SrcHi, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
}
B.erase(It);
@@ -1777,17 +1808,17 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
// Load high part.
LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
- .addFrameIndex(FI)
- .addImm(Size)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(Size)
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
@@ -1813,10 +1844,10 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
BuildMI(B, It, DL, HII.get(StoreOpc))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcR, getKillRegState(IsKill))
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcR, getKillRegState(IsKill))
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
@@ -1841,9 +1872,9 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
- .addFrameIndex(FI)
- .addImm(0)
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addFrameIndex(FI)
+ .addImm(0)
+ .cloneMemRefs(*MI);
B.erase(It);
return true;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 988718860c5b..d65d870750f8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -41,6 +41,8 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
override {}
+ bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
deleted file mode 100644
index 63ec9c3d3124..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- HexagonGatherPacketize.cpp -----------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass ensures that producer and consumer of VTMP are paired in a bundle.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "gather-packetize"
-
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-cl::opt<bool> EnableGatherPacketize(
- "hexagon-enable-gather-packetize", cl::Hidden, cl::init(true),
- cl::desc("Generate gather packets before packetization"));
-
-namespace llvm {
-FunctionPass *createHexagonGatherPacketize();
-void initializeHexagonGatherPacketizePass(PassRegistry &);
-}
-
-namespace {
-class HexagonGatherPacketize : public MachineFunctionPass {
-public:
- static char ID;
- HexagonGatherPacketize() : MachineFunctionPass(ID) {
- PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeHexagonGatherPacketizePass(Registry);
- }
-
- StringRef getPassName() const override {
- return "Hexagon Gather Packetize Code";
- }
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-char HexagonGatherPacketize::ID = 0;
-
-static inline bool isVtmpDef(const MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.isImplicit() &&
- (MO.getReg() == Hexagon::VTMP)) {
- return true;
- }
- return false;
-}
-
-static inline bool isVtmpUse(const MachineInstr &MI) {
- return (MI.mayStore() && (MI.getOperand(2)).isReg() &&
- ((MI.getOperand(2)).getReg() == Hexagon::VTMP));
-}
-
-bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) {
- if (!EnableGatherPacketize)
- return false;
- auto &ST = Fn.getSubtarget<HexagonSubtarget>();
- bool HasV65 = ST.hasV65Ops();
- bool UseHVX = ST.useHVXOps();
- if (!(HasV65 & UseHVX))
- return false;
-
- for (auto &MBB : Fn) {
- bool VtmpDef = false;
- MachineBasicBlock::iterator MII, MIE, DefMII;
- for (MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) {
- MachineInstr &MI = *MII;
- if (VtmpDef) {
- if (!isVtmpUse(MI))
- continue;
- MBB.splice(std::next(DefMII), &MBB, MII);
- finalizeBundle(MBB, DefMII.getInstrIterator(),
- std::next(MII).getInstrIterator());
- VtmpDef = false;
- continue;
- }
- if (!(isVtmpDef(MI)))
- continue;
- VtmpDef = true;
- DefMII = MII;
- }
- assert(!VtmpDef && "VTMP producer and consumer not in same block");
- }
- return true;
-}
-}
-
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-INITIALIZE_PASS(HexagonGatherPacketize, "hexagon-gather-packetize",
- "Hexagon gather packetize Code", false, false)
-
-FunctionPass *llvm::createHexagonGatherPacketize() {
- return new HexagonGatherPacketize();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 2582a021e956..e3492e7374e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -632,7 +632,7 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
SortableVectorType VRs;
for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
VRs.push_back(I->first);
- llvm::sort(VRs.begin(), VRs.end(), LexCmp);
+ llvm::sort(VRs, LexCmp);
// Transfer the results to the outgoing register ordering.
for (unsigned i = 0, n = VRs.size(); i < n; ++i)
RO.insert(std::make_pair(VRs[i], i));
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 0e33976a58ac..239cf49ca8a2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1011,10 +1011,9 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
/// the use of the hardware loop instruction.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool IsInnerHWLoop) const {
- const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+ << printMBBReference(**L->block_begin()));
+ for (MachineBasicBlock *MBB : L->getBlocks()) {
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
@@ -1368,11 +1367,10 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
const MachineOperand *MO,
LoopFeederMap &LoopFeederPhi) const {
if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
- const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+ << printMBBReference(**L->block_begin()));
// Ignore all BBs that form Loop.
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock *MBB : L->getBlocks()) {
if (A == MBB)
return false;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index efb4c2eb0fc3..470b05bda4c6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -127,8 +127,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
}
SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
-> MachineSDNode* {
@@ -159,7 +158,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
MVT::i32, MVT::Other, Base,
IncV, Chain);
- L->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(L, {MemOp});
To[1] = SDValue(L, 1); // Next address.
To[2] = SDValue(L, 2); // Chain.
// Handle special case for extension to i64.
@@ -170,7 +169,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
Base, Zero, Chain);
- L->setMemRefs(MemOp, MemOp+1);
+ CurDAG->setNodeMemRefs(L, {MemOp});
To[2] = SDValue(L, 1); // Chain.
MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base, IncV);
@@ -344,9 +343,8 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
FLI->second, dl, RTys,
{IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
- Res->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
+ CurDAG->setNodeMemRefs(Res, {MemOp});
ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
@@ -525,8 +523,7 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
}
SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
+ MachineMemOperand *MemOp = ST->getMemOperand();
// Next address Chain
SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
@@ -537,14 +534,14 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
SDValue Ops[] = { Base, IncV, Value, Chain };
MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
Ops);
- S->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(S, {MemOp});
To[0] = SDValue(S, 0);
To[1] = SDValue(S, 1);
} else {
SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDValue Ops[] = { Base, Zero, Value, Chain };
MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- S->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(S, {MemOp});
To[1] = SDValue(S, 0);
MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
Base, IncV);
@@ -1550,6 +1547,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
return true;
}
}
+ break;
}
default:
break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 8aef9b4560d5..b796e442d4fa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -120,7 +120,7 @@ struct Coloring {
return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
}
- void dump() const;
+ LLVM_DUMP_METHOD void dump() const;
private:
ArrayRef<Node> Order;
@@ -267,7 +267,7 @@ bool Coloring::color() {
return true;
}
-LLVM_DUMP_METHOD
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Coloring::dump() const {
dbgs() << "{ Order: {";
for (unsigned I = 0; I != Order.size(); ++I) {
@@ -309,6 +309,7 @@ void Coloring::dump() const {
dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
dbgs() << " }\n}\n";
}
+#endif
namespace {
// Base class of for reordering networks. They don't strictly need to be
@@ -651,6 +652,7 @@ struct OpRef {
IndexBits = 28,
};
+ LLVM_DUMP_METHOD
void print(raw_ostream &OS, const SelectionDAG &G) const;
private:
@@ -663,7 +665,7 @@ struct NodeTemplate {
MVT Ty = MVT::Other;
std::vector<OpRef> Ops;
- void print(raw_ostream &OS, const SelectionDAG &G) const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
};
struct ResultStack {
@@ -699,10 +701,12 @@ struct ResultStack {
BaseType List;
+ LLVM_DUMP_METHOD
void print(raw_ostream &OS, const SelectionDAG &G) const;
};
} // namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
if (isValue()) {
OpV.getNode()->print(OS, &G);
@@ -752,6 +756,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
OS << '\n';
}
}
+#endif
namespace {
struct ShuffleMask {
@@ -1327,6 +1332,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
return vmuxp(Bytes, L, R, Results);
}
+namespace {
+ struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
+ template <typename T>
+ Deleter(SelectionDAG &D, T &C)
+ : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
+ C.erase(N);
+ }) {}
+ };
+
+ template <typename T>
+ struct NullifyingVector : public T {
+ DenseMap<SDNode*, SDNode**> Refs;
+ NullifyingVector(T &&V) : T(V) {
+ for (unsigned i = 0, e = T::size(); i != e; ++i) {
+ SDNode *&N = T::operator[](i);
+ Refs[N] = &N;
+ }
+ }
+ void erase(SDNode *N) {
+ auto F = Refs.find(N);
+ if (F != Refs.end())
+ *F->second = nullptr;
+ }
+ };
+}
+
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
MVT ResTy, SDValue Va, SDValue Vb,
SDNode *N) {
@@ -1337,10 +1368,30 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
bool HavePairs = (2*HwLen == VecLen);
MVT SingleTy = getSingleVT(MVT::i8);
+ // The prior attempts to handle this shuffle may have left a bunch of
+ // dead nodes in the DAG (such as constants). These nodes will be added
+ // at the end of DAG's node list, which at that point had already been
+ // sorted topologically. In the main selection loop, the node list is
+ // traversed backwards from the root node, which means that any new
+ // nodes (from the end of the list) will not be visited.
+ // Scalarization will replace the shuffle node with the scalarized
+ // expression, and if that expression reused any if the leftoever (dead)
+ // nodes, these nodes would not be selected (since the "local" selection
+ // only visits nodes that are not in AllNodes).
+ // To avoid this issue, remove all dead nodes from the DAG now.
+ DAG.RemoveDeadNodes();
+ DenseSet<SDNode*> AllNodes;
+ for (SDNode &S : DAG.allnodes())
+ AllNodes.insert(&S);
+
+ Deleter DUA(DAG, AllNodes);
+
SmallVector<SDValue,128> Ops;
+ LLVMContext &Ctx = *DAG.getContext();
+ MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
for (int I : Mask) {
if (I < 0) {
- Ops.push_back(ISel.selectUndef(dl, ElemTy));
+ Ops.push_back(ISel.selectUndef(dl, LegalTy));
continue;
}
SDValue Vec;
@@ -1360,7 +1411,7 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
}
}
SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
- SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx});
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
SDValue L = Lower.LowerOperation(Ex, DAG);
assert(L.getNode());
Ops.push_back(L);
@@ -1384,32 +1435,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
assert(!N->use_empty());
ISel.ReplaceNode(N, LV.getNode());
- DAG.RemoveDeadNodes();
- std::deque<SDNode*> SubNodes;
- SubNodes.push_back(LV.getNode());
+ if (AllNodes.count(LV.getNode())) {
+ DAG.RemoveDeadNodes();
+ return true;
+ }
+
+ // The lowered build-vector node will now need to be selected. It needs
+ // to be done here because this node and its submodes are not included
+ // in the main selection loop.
+ // Implement essentially the same topological ordering algorithm as is
+ // used in SelectionDAGISel.
+
+ SetVector<SDNode*> SubNodes, TmpQ;
+ std::map<SDNode*,unsigned> NumOps;
+
+ SubNodes.insert(LV.getNode());
for (unsigned I = 0; I != SubNodes.size(); ++I) {
- for (SDValue Op : SubNodes[I]->ops())
- SubNodes.push_back(Op.getNode());
+ unsigned OpN = 0;
+ SDNode *S = SubNodes[I];
+ for (SDValue Op : S->ops()) {
+ if (AllNodes.count(Op.getNode()))
+ continue;
+ SubNodes.insert(Op.getNode());
+ ++OpN;
+ }
+ NumOps.insert({S, OpN});
+ if (OpN == 0)
+ TmpQ.insert(S);
}
- while (!SubNodes.empty()) {
- SDNode *S = SubNodes.front();
- SubNodes.pop_front();
- if (S->use_empty())
- continue;
- // This isn't great, but users need to be selected before any nodes that
- // they use. (The reason is to match larger patterns, and avoid nodes that
- // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
- bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
- return llvm::any_of(SubNodes, [U](const SDNode *T) {
- return T == U;
- });
- });
- if (PendingUser)
- SubNodes.push_back(S);
- else
- ISel.Select(S);
+
+ for (unsigned I = 0; I != TmpQ.size(); ++I) {
+ SDNode *S = TmpQ[I];
+ for (SDNode *U : S->uses()) {
+ if (!SubNodes.count(U))
+ continue;
+ auto F = NumOps.find(U);
+ assert(F != NumOps.end());
+ assert(F->second > 0);
+ if (!--F->second)
+ TmpQ.insert(F->first);
+ }
}
+ assert(SubNodes.size() == TmpQ.size());
+ NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+ Deleter DUQ(DAG, Queue);
+ for (SDNode *S : reverse(Queue))
+ if (S != nullptr)
+ ISel.Select(S);
DAG.RemoveDeadNodes();
return true;
@@ -2048,10 +2122,6 @@ void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
}
void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
- if (!HST->usePackets()) {
- report_fatal_error("Support for gather requires packets, "
- "which are disabled");
- }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2083,18 +2153,13 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
ReplaceNode(N, Result);
}
void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
- if (!HST->usePackets()) {
- report_fatal_error("Support for gather requires packets, "
- "which are disabled");
- }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2125,9 +2190,8 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
ReplaceNode(N, Result);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 604d84994b6c..1edf3e498dfa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -151,16 +152,6 @@ static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
#include "HexagonGenCallingConv.inc"
-void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
- if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
-
- setOperationAction(ISD::STORE, VT, Promote);
- AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
- }
-}
-
SDValue
HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
const {
@@ -250,6 +241,18 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return true;
}
+unsigned HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ // Just support r19, the linux kernel uses it.
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r19", Hexagon::R19)
+ .Default(0);
+ if (Reg)
+ return Reg;
+
+ report_fatal_error("Invalid register name global variable");
+}
+
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
@@ -1225,7 +1228,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
Subtarget(ST) {
- bool IsV4 = !Subtarget.hasV5Ops();
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(4);
@@ -1267,10 +1269,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
- if (Subtarget.hasV5Ops()) {
- addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
- }
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
//
// Handling of scalar operations.
@@ -1284,21 +1284,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// which default to "expand" for at least one type.
// Misc operations.
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
-
- setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
- setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom legalize GlobalAddress nodes into CONST32.
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -1348,8 +1348,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ, MVT::i8, Promote);
setOperationAction(ISD::CTTZ, MVT::i16, Promote);
- // In V5, popcount can count # of 1s in i64 but returns i32.
- // On V4 it will be expanded (set later).
+ // Popcount can count # of 1s in i64 but returns i32.
setOperationAction(ISD::CTPOP, MVT::i8, Promote);
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@@ -1360,6 +1359,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
+ setOperationAction(ISD::FSHL, MVT::i32, Legal);
+ setOperationAction(ISD::FSHL, MVT::i64, Legal);
+ setOperationAction(ISD::FSHR, MVT::i32, Legal);
+ setOperationAction(ISD::FSHR, MVT::i64, Legal);
+
for (unsigned IntExpOp :
{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
@@ -1403,12 +1407,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of vector operations.
//
- promoteLdStType(MVT::v4i8, MVT::i32);
- promoteLdStType(MVT::v2i16, MVT::i32);
- promoteLdStType(MVT::v8i8, MVT::i64);
- promoteLdStType(MVT::v4i16, MVT::i64);
- promoteLdStType(MVT::v2i32, MVT::i64);
-
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
@@ -1488,9 +1486,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
}
// Custom lower unaligned loads.
- for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
- MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
- setOperationAction(ISD::LOAD, VecVT, Custom);
+ // Also, for both loads and stores, verify the alignment of the address
+ // in case it is a compile-time constant. This is a usability feature to
+ // provide a meaningful error message to users.
+ for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
@@ -1508,63 +1510,27 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- // Subtarget-specific operation actions.
- //
- if (Subtarget.hasV60Ops()) {
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
- setOperationAction(ISD::ROTL, MVT::i64, Custom);
- }
- if (Subtarget.hasV5Ops()) {
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FADD, MVT::f64, Expand);
- setOperationAction(ISD::FSUB, MVT::f64, Expand);
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
-
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- } else { // V4
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i8, Expand);
- setOperationAction(ISD::CTPOP, MVT::i16, Expand);
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
- // Expand these operations for both f32 and f64:
- for (unsigned FPExpOpV4 :
- {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
- setOperationAction(FPExpOpV4, MVT::f32, Expand);
- setOperationAction(FPExpOpV4, MVT::f64, Expand);
- }
-
- for (ISD::CondCode FPExpCCV4 :
- {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
- ISD::SETUO, ISD::SETO}) {
- setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
- setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
- }
- }
+ // V5+.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
// Handling of indexed loads/stores: default is "expand".
//
@@ -1574,6 +1540,19 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
+ // Subtarget-specific operation actions.
+ //
+ if (Subtarget.hasV60Ops()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i64, Legal);
+ setOperationAction(ISD::ROTR, MVT::i32, Legal);
+ setOperationAction(ISD::ROTR, MVT::i64, Legal);
+ }
+ if (Subtarget.hasV66Ops()) {
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::f64, Legal);
+ }
+
if (Subtarget.useHVXOps())
initializeHVXLowering();
@@ -1600,42 +1579,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
- if (IsV4) {
- // Handle single-precision floating point operations on V4.
- if (FastMath) {
- setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
- setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
- setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
- setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
- setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
- // Double-precision compares.
- setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
- setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
- } else {
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- // Double-precision compares.
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- }
- }
-
// This is the only fast library function for sqrtd.
if (FastMath)
setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
// Prefix is: nothing for "slow-math",
- // "fast2_" for V4 fast-math and V5+ fast-math double-precision
+ // "fast2_" for V5+ fast-math double-precision
// (actually, keep fast-math and fast-math2 separate for now)
if (FastMath) {
setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
- // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
} else {
setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
@@ -1645,44 +1600,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
}
- if (Subtarget.hasV5Ops()) {
- if (FastMath)
- setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
- else
- setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
- } else {
- // V4
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- }
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+ else
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
// These cause problems when the shift amount is non-constant.
setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1738,6 +1659,26 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
+void
+HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+ unsigned NeedAlign) const {
+ auto *CA = dyn_cast<ConstantSDNode>(Ptr);
+ if (!CA)
+ return;
+ unsigned Addr = CA->getZExtValue();
+ unsigned HaveAlign = Addr != 0 ? 1u << countTrailingZeros(Addr) : NeedAlign;
+ if (HaveAlign < NeedAlign) {
+ std::string ErrMsg;
+ raw_string_ostream O(ErrMsg);
+ O << "Misaligned constant address: " << format_hex(Addr, 10)
+ << " has alignment " << HaveAlign
+ << ", but the memory access requires " << NeedAlign;
+ if (DebugLoc DL = dl.getDebugLoc())
+ DL.print(O << ", at ");
+ report_fatal_error(O.str());
+ }
+}
+
// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
// intrinsic.
static bool isBrevLdIntrinsic(const Value *Inst) {
@@ -1834,11 +1775,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// The intrinsic function call is of the form { ElTy, i8* }
// @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
// should be derived from ElTy.
- PointerType *PtrTy = I.getCalledFunction()
- ->getReturnType()
- ->getContainedType(0)
- ->getPointerTo();
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
+ Info.memVT = MVT::getVT(ElTy);
llvm::Value *BasePtrVal = I.getOperand(0);
Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
// The offset value comes through Modifier register. For now, assume the
@@ -1904,12 +1842,12 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
}
TargetLoweringBase::LegalizeTypeAction
-HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
+HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() == 1)
return TargetLoweringBase::TypeScalarizeVector;
// Always widen vectors of i1.
- MVT ElemTy = VT.getSimpleVT().getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy == MVT::i1)
return TargetLoweringBase::TypeWidenVector;
@@ -2341,8 +2279,9 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
// position 0.
assert(ty(IdxV) == MVT::i32);
+ unsigned VecRep = 8 / VecWidth;
SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
- DAG.getConstant(8*Scale, dl, MVT::i32));
+ DAG.getConstant(8*VecRep, dl, MVT::i32));
SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
while (Scale > 1) {
@@ -2643,12 +2582,37 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
}
SDValue
+HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ unsigned ClaimAlign = LN->getAlignment();
+ validateConstPtrAlignment(LN->getBasePtr(), SDLoc(Op), ClaimAlign);
+ // Call LowerUnalignedLoad for all loads, it recognizes loads that
+ // don't need extra aligning.
+ return LowerUnalignedLoad(Op, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ unsigned ClaimAlign = SN->getAlignment();
+ SDValue Ptr = SN->getBasePtr();
+ const SDLoc &dl(Op);
+ validateConstPtrAlignment(Ptr, dl, ClaimAlign);
+
+ MVT StoreTy = SN->getMemoryVT().getSimpleVT();
+ unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy);
+ if (ClaimAlign < NeedAlign)
+ return expandUnalignedStore(SN, DAG);
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
const {
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
- unsigned HaveAlign = LN->getAlignment();
MVT LoadTy = ty(Op);
unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
+ unsigned HaveAlign = LN->getAlignment();
if (HaveAlign >= NeedAlign)
return Op;
@@ -2802,7 +2766,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
- case ISD::LOAD: return LowerUnalignedLoad(Op, DAG);
+ case ISD::LOAD: return LowerLoad(Op, DAG);
+ case ISD::STORE: return LowerStore(Op, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
case ISD::SRA:
@@ -2834,6 +2799,19 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
void
+HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ // We are only custom-lowering stores to verify the alignment of the
+ // address if it is a compile-time constant. Since a store can be modified
+ // during type-legalization (the value being stored may need legalization),
+ // return empty Results here to indicate that we don't really make any
+ // changes in the custom lowering.
+ if (N->getOpcode() != ISD::STORE)
+ return TargetLowering::LowerOperationWrapper(N, Results, DAG);
+}
+
+void
HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -2946,7 +2924,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- return Subtarget.hasV5Ops();
+ return true;
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -3110,6 +3088,25 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
return TargetLowering::findRepresentativeClass(TRI, VT);
}
+bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
+ ISD::LoadExtType ExtTy, EVT NewVT) const {
+ // TODO: This may be worth removing. Check regression tests for diffs.
+ if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+ return false;
+
+ auto *L = cast<LoadSDNode>(Load);
+ std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
+ // Small-data object, do not shrink.
+ if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
+ return false;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
+ auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
+ const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
+ return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
+ }
+ return true;
+}
+
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
BasicBlock *BB = Builder.GetInsertBlock();
@@ -3154,9 +3151,12 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
}
-bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
- AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
- return Size >= 4 && Size <= 8;
+ if (Size >= 4 && Size <= 8)
+ return AtomicExpansionKind::LLSC;
+ return AtomicExpansionKind::None;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3d94bd1ff6ed..265c37e6ae61 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -101,7 +101,6 @@ namespace HexagonISD {
bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
const;
- void promoteLdStType(MVT VT, MVT PromotedLdStVT);
public:
explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -142,10 +141,12 @@ namespace HexagonISD {
unsigned DefinedValues) const override;
bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -164,6 +165,8 @@ namespace HexagonISD {
SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
@@ -220,6 +223,9 @@ namespace HexagonISD {
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
+
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
unsigned
@@ -298,6 +304,9 @@ namespace HexagonISD {
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
const override;
+ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+ EVT NewVT) const override;
+
// Handling of atomic RMW instructions.
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
@@ -305,7 +314,8 @@ namespace HexagonISD {
Value *Addr, AtomicOrdering Ord) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
@@ -314,6 +324,9 @@ namespace HexagonISD {
private:
void initializeHVXLowering();
+ void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+ unsigned NeedAlign) const;
+
std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 2566194ca9c6..a6400b5d8266 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -400,6 +400,76 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
MachinePointerInfo::getConstantPool(MF), Align);
}
+ // A special case is a situation where the vector is built entirely from
+ // elements extracted from another vector. This could be done via a shuffle
+ // more efficiently, but typically, the size of the source vector will not
+ // match the size of the vector being built (which precludes the use of a
+ // shuffle directly).
+ // This only handles a single source vector, and the vector being built
+ // should be of a sub-vector type of the source vector type.
+ auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
+ SmallVectorImpl<int> &SrcIdx) {
+ SDValue Vec;
+ for (SDValue V : Values) {
+ if (isUndef(V)) {
+ SrcIdx.push_back(-1);
+ continue;
+ }
+ if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+ // All extracts should come from the same vector.
+ SDValue T = V.getOperand(0);
+ if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
+ return false;
+ Vec = T;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (C == nullptr)
+ return false;
+ int I = C->getSExtValue();
+ assert(I >= 0 && "Negative element index");
+ SrcIdx.push_back(I);
+ }
+ SrcVec = Vec;
+ return true;
+ };
+
+ SmallVector<int,128> ExtIdx;
+ SDValue ExtVec;
+ if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
+ MVT ExtTy = ty(ExtVec);
+ unsigned ExtLen = ExtTy.getVectorNumElements();
+ if (ExtLen == VecLen || ExtLen == 2*VecLen) {
+ // Construct a new shuffle mask that will produce a vector with the same
+ // number of elements as the input vector, and such that the vector we
+ // want will be the initial subvector of it.
+ SmallVector<int,128> Mask;
+ BitVector Used(ExtLen);
+
+ for (int M : ExtIdx) {
+ Mask.push_back(M);
+ if (M >= 0)
+ Used.set(M);
+ }
+ // Fill the rest of the mask with the unused elements of ExtVec in hopes
+ // that it will result in a permutation of ExtVec's elements. It's still
+ // fine if it doesn't (e.g. if undefs are present, or elements are
+ // repeated), but permutations can always be done efficiently via vdelta
+ // and vrdelta.
+ for (unsigned I = 0; I != ExtLen; ++I) {
+ if (Mask.size() == ExtLen)
+ break;
+ if (!Used.test(I))
+ Mask.push_back(I);
+ }
+
+ SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
+ DAG.getUNDEF(ExtTy), Mask);
+ if (ExtLen == VecLen)
+ return S;
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S);
+ }
+ }
+
// Construct two halves in parallel, then or them together.
assert(4*Words.size() == Subtarget.getVectorLength());
SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
@@ -1356,7 +1426,8 @@ SDValue
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
// Sign- and zero-extends are legal.
assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
- return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op));
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
+ Op.getOperand(0));
}
SDValue
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 1bb3bc1ea31b..2236140d5dd7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -69,101 +69,101 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Instruction type according to the ISA.
IType Type = type;
- let TSFlags{5-0} = Type.Value;
+ let TSFlags{6-0} = Type.Value;
// Solo instructions, i.e., those that cannot be in a packet with others.
bits<1> isSolo = 0;
- let TSFlags{6} = isSolo;
+ let TSFlags{7} = isSolo;
// Packed only with A or X-type instructions.
bits<1> isSoloAX = 0;
- let TSFlags{7} = isSoloAX;
+ let TSFlags{8} = isSoloAX;
// Restricts slot 1 to ALU-only instructions.
bits<1> isRestrictSlot1AOK = 0;
- let TSFlags{8} = isRestrictSlot1AOK;
+ let TSFlags{9} = isRestrictSlot1AOK;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{9} = isPredicated;
+ let TSFlags{10} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{10} = isPredicatedFalse;
+ let TSFlags{11} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{11} = isPredicatedNew;
+ let TSFlags{12} = isPredicatedNew;
bits<1> isPredicateLate = 0;
- let TSFlags{12} = isPredicateLate; // Late predicate producer insn.
+ let TSFlags{13} = isPredicateLate; // Late predicate producer insn.
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{13} = isNewValue; // New-value consumer insn.
+ let TSFlags{14} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{14} = hasNewValue; // New-value producer insn.
+ let TSFlags{15} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{17-15} = opNewValue; // New-value produced operand.
+ let TSFlags{18-16} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{18} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{19} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{19} = isNVStore; // New-value store insn.
+ let TSFlags{20} = isNVStore; // New-value store insn.
bits<1> isCVLoadable = 0;
- let TSFlags{20} = isCVLoadable; // Load that can become cur-value load.
+ let TSFlags{21} = isCVLoadable; // Load that can become cur-value load.
bits<1> isCVLoad = 0;
- let TSFlags{21} = isCVLoad; // Cur-value load insn.
+ let TSFlags{22} = isCVLoad; // Cur-value load insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{22} = isExtendable; // Insn may be extended.
+ let TSFlags{23} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{23} = isExtended; // Insn must be extended.
+ let TSFlags{24} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{26-24} = opExtendable; // Which operand may be extended.
+ let TSFlags{27-25} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{27} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{28} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{32-28} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{33-29} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{35-34} = opExtentAlign; // Alignment exponent before extending.
bit cofMax1 = 0;
- let TSFlags{35} = cofMax1;
+ let TSFlags{36} = cofMax1;
bit cofRelax1 = 0;
- let TSFlags{36} = cofRelax1;
+ let TSFlags{37} = cofRelax1;
bit cofRelax2 = 0;
- let TSFlags{37} = cofRelax2;
+ let TSFlags{38} = cofRelax2;
bit isRestrictNoSlot1Store = 0;
- let TSFlags{38} = isRestrictNoSlot1Store;
+ let TSFlags{39} = isRestrictNoSlot1Store;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{43-41} = addrMode.Value;
+ let TSFlags{44-42} = addrMode.Value;
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{47-44} = accessSize.Value;
+ let TSFlags{48-45} = accessSize.Value;
bits<1> isTaken = 0;
- let TSFlags {48} = isTaken; // Branch prediction.
+ let TSFlags {49} = isTaken; // Branch prediction.
bits<1> isFP = 0;
- let TSFlags {49} = isFP; // Floating-point.
+ let TSFlags {50} = isFP; // Floating-point.
bits<1> isSomeOK = 0;
- let TSFlags {50} = isSomeOK; // Relax some grouping constraints.
+ let TSFlags {51} = isSomeOK; // Relax some grouping constraints.
bits<1> hasNewValue2 = 0;
- let TSFlags{51} = hasNewValue2; // Second New-value producer insn.
+ let TSFlags{52} = hasNewValue2; // Second New-value producer insn.
bits<3> opNewValue2 = 0;
- let TSFlags{54-52} = opNewValue2; // Second New-value produced operand.
+ let TSFlags{55-53} = opNewValue2; // Second New-value produced operand.
bits<1> isAccumulator = 0;
- let TSFlags{55} = isAccumulator;
+ let TSFlags{56} = isAccumulator;
bits<1> prefersSlot3 = 0;
- let TSFlags{56} = prefersSlot3; // Complex XU
+ let TSFlags{57} = prefersSlot3; // Complex XU
bits<1> hasTmpDst = 0;
- let TSFlags{59} = hasTmpDst; // v65 : 'fake" register VTMP is set
+ let TSFlags{60} = hasTmpDst; // v65 : 'fake" register VTMP is set
bit CVINew = 0;
- let TSFlags{61} = CVINew;
+ let TSFlags{62} = CVINew;
// Fields used for relation models.
bit isNonTemporal = 0;
@@ -194,8 +194,6 @@ class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
-// LD Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
@@ -205,9 +203,6 @@ class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
-// ST Instruction Class in V2/V3 can take SLOT0 only.
-// ST Instruction Class in V4 can take SLOT0 & SLOT1.
-// Definition of the instruction class CHANGED from V2/V3 to V4.
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
@@ -235,15 +230,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// V4 Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormatsV4.td"
-
-//===----------------------------------------------------------------------===//
-// V60+ Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
+include "HexagonInstrFormatsV5.td"
include "HexagonInstrFormatsV60.td"
include "HexagonInstrFormatsV65.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
index c5fa25995212..c8de5cbcc1e0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@@ -1,4 +1,4 @@
-//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==//
+//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the Hexagon V4 instruction classes in TableGen format.
+// This file describes the Hexagon V5 instruction classes in TableGen format.
//
//===----------------------------------------------------------------------===//
@@ -49,39 +49,39 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
- let TSFlags{5-0} = Type.Value;
+ let TSFlags{6-0} = Type.Value;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{6} = isPredicated;
+ let TSFlags{7} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{7} = isPredicatedFalse;
+ let TSFlags{8} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{8} = isPredicatedNew;
+ let TSFlags{9} = isPredicatedNew;
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{9} = isNewValue; // New-value consumer insn.
+ let TSFlags{10} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{10} = hasNewValue; // New-value producer insn.
+ let TSFlags{11} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ let TSFlags{14-12} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{15} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{15} = isNVStore; // New-value store insn.
+ let TSFlags{16} = isNVStore; // New-value store insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{16} = isExtendable; // Insn may be extended.
+ let TSFlags{17} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{17} = isExtended; // Insn must be extended.
+ let TSFlags{18} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ let TSFlags{21-19} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{22} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{27-23} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{29-28} = opExtentAlign; // Alignment exponent before extending.
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 6019c7c5d024..de0d6c4d9e4e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -335,37 +335,37 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
/// This function checks if the instruction or bundle of instructions
/// has load from stack slot and returns frameindex and machine memory
/// operand of that instruction if true.
-bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool HexagonInstrInfo::hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
if (MI.isBundle()) {
const MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex))
+ if (TargetInstrInfo::hasLoadFromStackSlot(*MII, Accesses))
return true;
return false;
}
- return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex);
+ return TargetInstrInfo::hasLoadFromStackSlot(MI, Accesses);
}
/// This function checks if the instruction or bundle of instructions
/// has store to stack slot and returns frameindex and machine memory
/// operand of that instruction if true.
-bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const {
+bool HexagonInstrInfo::hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
if (MI.isBundle()) {
const MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex))
+ if (TargetInstrInfo::hasStoreToStackSlot(*MII, Accesses))
return true;
return false;
}
- return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex);
+ return TargetInstrInfo::hasStoreToStackSlot(MI, Accesses);
}
/// This function can analyze one/two way branching only and should (mostly) be
@@ -1086,19 +1086,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai;
unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
- MachineInstr *MI1New =
- BuildMI(MBB, MI, DL, get(NewOpc))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(SrcSubLo)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(SrcSubLo)
+ .cloneMemRefs(MI);
MI1New->getOperand(0).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc))
.add(MI.getOperand(0))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(1).getImm() + Offset)
.addReg(SrcSubHi)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1111,15 +1110,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
HRI.getSubReg(DstReg, Hexagon::vsub_lo))
- .add(MI.getOperand(1))
- .addImm(MI.getOperand(2).getImm())
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm())
+ .cloneMemRefs(MI);
MI1New->getOperand(1).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
.add(MI.getOperand(1))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(2).getImm() + Offset)
- .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .cloneMemRefs(MI);
MBB.erase(MI);
return true;
}
@@ -1294,7 +1293,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
.add(Op0)
.addReg(PReg, S)
- .add(Op1)
.addReg(SrcHi)
.addReg(SrcLo);
if (IsDestLive)
@@ -1342,81 +1340,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Hexagon::J2_jumprfnew));
return true;
- case Hexagon::V6_vgathermh_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermw_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhw_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermwq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
- case Hexagon::V6_vgathermhwq_pseudo:
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(0)
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return true;
-
case Hexagon::PS_loadrub_pci:
return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true, /*MxOp*/4);
case Hexagon::PS_loadrb_pci:
@@ -1466,6 +1389,93 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
+MachineBasicBlock::instr_iterator
+HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Opc = MI.getOpcode();
+ MachineBasicBlock::iterator First;
+
+ switch (Opc) {
+ case Hexagon::V6_vgathermh_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(0)
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+ }
+
+ return MI.getIterator();
+}
+
// We indicate that we want to reverse the branch by
// inserting the reversed branching opcode.
bool HexagonInstrInfo::reverseBranchCondition(
@@ -2883,14 +2893,15 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
}
/// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
- unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
- const {
+bool HexagonInstrInfo::getMemOperandWithOffset(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
unsigned AccessSize = 0;
- int OffsetVal = 0;
- BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize);
- Offset = OffsetVal;
- return BaseReg != 0;
+ BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+ assert((!BaseOp || BaseOp->isReg()) &&
+ "getMemOperandWithOffset only supports base "
+ "operands of type register.");
+ return BaseOp != nullptr;
}
/// Can these instructions execute at the same time in a bundle.
@@ -3097,21 +3108,22 @@ unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
// Returns the base register in a memory access (load/store). The offset is
// returned in Offset and the access size is returned in AccessSize.
-// If the base register has a subregister or the offset field does not contain
-// an immediate value, return 0.
-unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
- int &Offset, unsigned &AccessSize) const {
+// If the base operand has a subregister or the offset field does not contain
+// an immediate value, return nullptr.
+MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
+ int64_t &Offset,
+ unsigned &AccessSize) const {
// Return if it is not a base+offset type instruction or a MemOp.
if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
getAddrMode(MI) != HexagonII::BaseLongOffset &&
!isMemOp(MI) && !isPostIncrement(MI))
- return 0;
+ return nullptr;
AccessSize = getMemAccessSize(MI);
unsigned BasePos = 0, OffsetPos = 0;
if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos))
- return 0;
+ return nullptr;
// Post increment updates its EA after the mem access,
// so we need to treat its offset as zero.
@@ -3120,14 +3132,14 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
} else {
const MachineOperand &OffsetOp = MI.getOperand(OffsetPos);
if (!OffsetOp.isImm())
- return 0;
+ return nullptr;
Offset = OffsetOp.getImm();
}
const MachineOperand &BaseOp = MI.getOperand(BasePos);
if (BaseOp.getSubReg() != 0)
- return 0;
- return BaseOp.getReg();
+ return nullptr;
+ return &const_cast<MachineOperand&>(BaseOp);
}
/// Return the position of the base and offset operands for this instruction.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 96b4ffaba02f..9b840762e88a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -69,16 +69,16 @@ public:
/// Check if the instruction or the bundle of instructions has
/// load from stack slots. Return the frameindex and machine memory operand
/// if true.
- bool hasLoadFromStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const override;
+ bool hasLoadFromStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
/// Check if the instruction or the bundle of instructions has
/// store to stack slots. Return the frameindex and machine memory operand
/// if true.
- bool hasStoreToStackSlot(const MachineInstr &MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const override;
+ bool hasStoreToStackSlot(
+ const MachineInstr &MI,
+ SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
/// Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -216,9 +216,9 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Get the base register and byte offset of a load/store instr.
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
/// Reverses the branch condition of the specified condition list,
/// returning false on success and true if it cannot be reversed.
@@ -436,8 +436,8 @@ public:
bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
unsigned getAddrMode(const MachineInstr &MI) const;
- unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
- unsigned &AccessSize) const;
+ MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
+ unsigned &AccessSize) const;
SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
unsigned getCExtOpNum(const MachineInstr &MI) const;
HexagonII::CompoundGroup
@@ -472,6 +472,8 @@ public:
uint64_t getType(const MachineInstr &MI) const;
unsigned getUnits(const MachineInstr &MI) const;
+ MachineBasicBlock::instr_iterator expandVGatherPseudo(MachineInstr &MI) const;
+
/// getInstrTimingClassLatency - Compute the instruction latency of a given
/// instruction using Timing Class information, if available.
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index b25e316709c5..9cab5748bef2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -6,726 +6,78 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V2 Architecture
-// Application-Level Specification
-// 80-V9418-8 Rev. B
-// March 4, 2008
-//===----------------------------------------------------------------------===//
-class T_I_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$Is),
- (MI imm:$Is)>;
+// These intrinsic patterns are not auto-generated.
class T_R_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs),
(MI I32:$Rs)>;
-class T_P_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs),
- (MI I64:$Rs)>;
-
-class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
- : Pat<(IntID Imm1:$Is, Imm2:$It),
- (MI Imm1:$Is, Imm2:$It)>;
-
-class T_RI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID I32:$Rs, ImmPred:$It),
- (MI I32:$Rs, ImmPred:$It)>;
-
-class T_IR_pat <InstHexagon MI, Intrinsic IntID,
- PatFrag ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID ImmPred:$Is, I32:$Rt),
- (MI ImmPred:$Is, I32:$Rt)>;
-
-class T_PI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I64:$Rs, imm:$It),
- (MI I64:$Rs, imm:$It)>;
-
-class T_RP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I32:$Rs, I64:$Rt),
- (MI I32:$Rs, I64:$Rt)>;
-
class T_RR_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs, I32:$Rt),
(MI I32:$Rs, I32:$Rt)>;
-class T_PP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt),
- (MI I64:$Rs, I64:$Rt)>;
-
-class T_QQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt),
- (MI (C2_tfrrp I32:$Rs), (C2_tfrrp I32:$Rt))>;
-
-class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
- : Pat <(IntID I32:$Rp, Imm1:$Is, Imm2:$It),
- (MI (C2_tfrrp I32:$Rp), Imm1:$Is, Imm2:$It)>;
-
-class T_QRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rs, I32:$Rt),
- (MI (C2_tfrrp I32:$Rp), I32:$Rs, I32:$Rt)>;
-
-class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
- : Pat <(IntID I32:$Rp, I32:$Rs, ImmPred:$Is),
- (MI (C2_tfrrp I32:$Rp), I32:$Rs, ImmPred:$Is)>;
-
-class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
- : Pat <(IntID I32:$Rp, ImmPred:$Is, I32:$Rs),
- (MI (C2_tfrrp I32:$Rp), ImmPred:$Is, I32:$Rs)>;
-
-class T_QPP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I64:$Rs, I64:$Rt),
- (MI (C2_tfrrp I32:$Rp), I64:$Rs, I64:$Rt)>;
-
-class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
- (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
-
-class T_RII_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
- (MI I32:$Rs, imm:$It, imm:$Iu)>;
-
-class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
- (MI imm:$It, I32:$Rs, imm:$Iu)>;
-
-class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
- (MI imm:$Is, I32:$Rs, I32:$Rt)>;
-
-class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
- (MI I32:$Rs, imm:$Is, I32:$Rt)>;
-
-class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
- (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
- (MI I64:$Rs, I64:$Rt, imm:$Iu)>;
-
-class T_PII_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
- (MI I64:$Rs, imm:$It, imm:$Iu)>;
-
-class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
- (MI I64:$Rs, I64:$Rt, I64:$Ru)>;
-
-class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
- (MI I64:$Rs, I64:$Rt, I32:$Ru)>;
-
-class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
- (MI I64:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Rp),
- (MI I64:$Rs, I64:$Rt, (C2_tfrrp I32:$Rp))>;
-
-class T_PR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt),
- (MI I64:$Rs, I32:$Rt)>;
-
-class T_D_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID (F64:$Rs)),
- (MI (F64:$Rs))>;
-
-class T_DI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID F64:$Rs, ImmPred:$It),
- (MI F64:$Rs, ImmPred:$It)>;
-
-class T_F_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs),
- (MI F32:$Rs)>;
-
-class T_FI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID F32:$Rs, ImmPred:$It),
- (MI F32:$Rs, ImmPred:$It)>;
-
-class T_FF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt),
- (MI F32:$Rs, F32:$Rt)>;
-
-class T_DD_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, F64:$Rt),
- (MI F64:$Rs, F64:$Rt)>;
-
-class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
- (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
-
-class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, I32:$Rp),
- (MI F32:$Rs, F32:$Rt, F32:$Ru, (C2_tfrrp I32:$Rp))>;
-
-class T_Q_RI_pat <InstHexagon MI, Intrinsic IntID,
- PatLeaf ImmPred = PatLeaf<(i32 imm)>>
- : Pat<(IntID I32:$Rs, ImmPred:$It),
- (C2_tfrpr (MI I32:$Rs, ImmPred:$It))>;
-
-class T_Q_RR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rs, I32:$Rt),
- (C2_tfrpr (MI I32:$Rs, I32:$Rt))>;
-
-class T_Q_RP_pat <InstHexagon MI, Intrinsic IntID>
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
: Pat <(IntID I32:$Rs, I64:$Rt),
- (C2_tfrpr (MI I32:$Rs, I64:$Rt))>;
-
-class T_Q_PR_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I32:$Rt),
- (C2_tfrpr (MI I64:$Rs, I32:$Rt))>;
-
-class T_Q_PI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID I64:$Rs, imm:$It),
- (C2_tfrpr (MI I64:$Rs, imm:$It))>;
-
-class T_Q_PP_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I64:$Rs, I64:$Rt),
- (C2_tfrpr (MI I64:$Rs, I64:$Rt))>;
-
-class T_Q_Q_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp)))>;
-
-class T_Q_QQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rq),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq)))>;
-
-class T_Q_FF_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, F32:$Rt),
- (C2_tfrpr (MI F32:$Rs, F32:$Rt))>;
-
-class T_Q_DD_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, F64:$Rt),
- (C2_tfrpr (MI F64:$Rs, F64:$Rt))>;
-
-class T_Q_FI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F32:$Rs, imm:$It),
- (C2_tfrpr (MI F32:$Rs, imm:$It))>;
-
-class T_Q_DI_pat <InstHexagon MI, Intrinsic IntID>
- : Pat<(IntID F64:$Rs, imm:$It),
- (C2_tfrpr (MI F64:$Rs, imm:$It))>;
-
-class T_Q_QQQ_pat <InstHexagon MI, Intrinsic IntID>
- : Pat <(IntID I32:$Rp, I32:$Rq, I32:$Rs),
- (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq),
- (C2_tfrrp I32:$Rs)))>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords
-//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
-def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
-def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
-def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
-def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
-def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
-def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
-def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
-
-def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
-def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
-def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
-def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
-def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
-def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
-def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
-def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
-
-def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the accumulator.
-//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
-def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
-def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// Multiply signed/unsigned halfwords with and without saturation and rounding
-// into a 64-bits destination register.
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
-def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
-def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
-def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
-def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
-def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
-def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
-def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
-
-def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
-
-def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
-def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
-def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
-def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
-def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
-def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
-def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
-def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the 64-bit destination register.
-//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
-
-// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
-def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
-
-// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
-def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
-
-// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
-def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
-
-// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
-def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
-
-//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>;
-def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>;
-def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
-def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
-
-//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>;
-def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>;
-def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
-def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
-
-//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>;
-def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>;
-def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
-def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
-
-//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>;
-def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>;
-def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
-def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
-
-// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
-def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>;
-def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
-
-// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
-def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>;
-def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
-
-// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
-
-// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
-
-// Vector reduce complex multiply real or imaginary:
-// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
-def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>;
-def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
-def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>;
-def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
-
-def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>;
-def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
-def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>;
-def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
-
-// Vector reduce halfwords
-// Rdd[+]=vrmpyh(Rss,Rtt)
-def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
-def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
-
-//===----------------------------------------------------------------------===//
-// Vector Multipy with accumulation
-//===----------------------------------------------------------------------===//
-
-// Vector multiply word by signed half with accumulation
-// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
-def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
-def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
-def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
-def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
-def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
-def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
-def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
-
-// Vector multiply word by unsigned half with accumulation
-// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
-def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
-def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
-def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
-def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
-def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
-def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
-def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
-
-// Vector multiply even halfwords with accumulation
-// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
-def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
-def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
-def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
-
-// Vector dual multiply with accumulation
-// Rxx+=vdmpy(Rss,Rtt)[:sat]
-def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
-def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
-
-// Vector complex multiply real or imaginary with accumulation
-// Rxx+=vcmpy[ir](Rss,Rtt):sat
-def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
-def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
-
-//===----------------------------------------------------------------------===//
-// Add/Subtract halfword
-// Rd=add(Rt.L,Rs.[HL])[:sat]
-// Rd=sub(Rt.L,Rs.[HL])[:sat]
-// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
-// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
-//===----------------------------------------------------------------------===//
-
-//Rd=add(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>;
-def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>;
-
-//Rd=add(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
-def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>;
-def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
-def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>;
-def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>;
-def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>;
-def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>;
-def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>;
-def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>;
-def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
-def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
-def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
-def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
-def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
-def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
-def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
-
-// ALU64 / ALU / min max
-def : T_RR_pat<A2_max, int_hexagon_A2_max>;
-def : T_RR_pat<A2_min, int_hexagon_A2_min>;
-def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
-def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
-
-// Shift and accumulate
-def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
-
-def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
-
-//*******************************************************************
-// ALU32/ALU
-//*******************************************************************
-def : T_RR_pat<A2_add, int_hexagon_A2_add>;
-def : T_RI_pat<A2_addi, int_hexagon_A2_addi>;
-def : T_RR_pat<A2_sub, int_hexagon_A2_sub>;
-def : T_IR_pat<A2_subri, int_hexagon_A2_subri>;
-def : T_RR_pat<A2_and, int_hexagon_A2_and>;
-def : T_RI_pat<A2_andir, int_hexagon_A2_andir>;
-def : T_RR_pat<A2_or, int_hexagon_A2_or>;
-def : T_RI_pat<A2_orir, int_hexagon_A2_orir>;
-def : T_RR_pat<A2_xor, int_hexagon_A2_xor>;
-def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+ (MI I32:$Rs, I64:$Rt)>;
+
+def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
+ (A2_add IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
+ (A2_addi IntRegs:$Rs, imm:$s16)>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
+ (A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
+ (A2_subri imm:$s10, IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$Rs, IntRegs:$Rt),
+ (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$Rs, IntRegs:$Rt), // Same as M2_mpyi
+ (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$Rs, imm:$s9),
+ (M2_mpysmi IntRegs:$Rs, imm:$s9)>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
+ (M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
+ (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
+ (S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
+ (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
+ (S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
+ (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
+
+def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
+ (A2_and IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
+ (A2_andir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
+ (A2_or IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
+ (A2_orir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
+ (A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$Rs),
+ (A2_sxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$Rs),
+ (A2_sxth IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$Rs),
+ (A2_zxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$Rs),
+ (A2_zxth IntRegs:$Rs)>;
// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
def : Pat <(int_hexagon_A2_not I32:$Rs),
@@ -757,16 +109,6 @@ def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
-// Transfer immediate
-def : Pat <(int_hexagon_A2_tfril I32:$Rs, u16_0ImmPred:$Is),
- (A2_tfril I32:$Rs, u16_0ImmPred:$Is)>;
-def : Pat <(int_hexagon_A2_tfrih I32:$Rs, u16_0ImmPred:$Is),
- (A2_tfrih I32:$Rs, u16_0ImmPred:$Is)>;
-
-// Transfer Register/immediate.
-def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
-def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
-
def ImmExt64: SDNodeXForm<imm, [{
int64_t V = N->getSExtValue();
return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i64);
@@ -783,49 +125,6 @@ def ImmExt64: SDNodeXForm<imm, [{
def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
(A2_tfrpi (ImmExt64 $Is))>;
-// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
-def : Pat<(int_hexagon_A2_tfrp I64:$src),
- (A2_combinew (HiReg I64:$src), (LoReg I64:$src))>;
-
-//*******************************************************************
-// ALU32/PERM
-//*******************************************************************
-// Combine
-def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
-def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
-def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
-def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32_0ImmPred, s8_0ImmPred>;
-
-// Mux
-def : T_QRR_pat<C2_mux, int_hexagon_C2_mux>;
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32_0ImmPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32_0ImmPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>;
-
-// Shift halfword
-def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
-def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
-
-// Sign/zero extend
-def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
-def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
-def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
-def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
-
-//*******************************************************************
-// ALU32/PRED
-//*******************************************************************
-// Compare
-def : T_Q_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
-def : T_Q_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
-def : T_Q_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-
-def : T_Q_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32_0ImmPred>;
-
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
@@ -839,420 +138,6 @@ def : Pat <(int_hexagon_C2_cmplt I32:$src1, I32:$src2),
def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
(C2_tfrpr (C2_cmpgtu I32:$src2, I32:$src1))>;
-//*******************************************************************
-// ALU32/VH
-//*******************************************************************
-// Vector add, subtract, average halfwords
-def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>;
-def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>;
-def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
-
-def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>;
-def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>;
-def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
-
-def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>;
-def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>;
-def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>;
-
-//*******************************************************************
-// ALU64/ALU
-//*******************************************************************
-def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>;
-def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>;
-def: T_PP_pat<A2_addp, int_hexagon_A2_addp>;
-def: T_PP_pat<A2_subp, int_hexagon_A2_subp>;
-
-def: T_PP_pat<A2_andp, int_hexagon_A2_andp>;
-def: T_PP_pat<A2_orp, int_hexagon_A2_orp>;
-def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>;
-
-def: T_Q_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>;
-def: T_Q_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>;
-def: T_Q_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>;
-
-def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>;
-def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>;
-
-//*******************************************************************
-// ALU64/VB
-//*******************************************************************
-// ALU64 - Vector add
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>;
-def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>;
-def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>;
-def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>;
-def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>;
-def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>;
-def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>;
-
-// ALU64 - Vector average
-def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>;
-def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>;
-def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>;
-def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>;
-def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>;
-def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>;
-def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>;
-
-def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>;
-def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>;
-def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>;
-def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>;
-def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>;
-
-// ALU64 - Vector negative average
-def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>;
-def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>;
-def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
-def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>;
-def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>;
-def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
-
-// ALU64 - Vector max
-def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>;
-def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>;
-def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>;
-def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>;
-def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>;
-
-// ALU64 - Vector min
-def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>;
-def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>;
-def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>;
-def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>;
-def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>;
-
-// ALU64 - Vector sub
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>;
-def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>;
-def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>;
-def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>;
-def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>;
-def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>;
-def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>;
-
-// ALU64 - Vector compare bytes
-def : T_Q_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>;
-def : T_Q_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>;
-def : T_Q_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
-
-// ALU64 - Vector compare halfwords
-def : T_Q_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>;
-def : T_Q_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>;
-def : T_Q_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
-
-// ALU64 - Vector compare words
-def : T_Q_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>;
-def : T_Q_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>;
-def : T_Q_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
-
-// ALU64 / VB / Vector mux.
-def : T_QPP_pat <C2_vmux, int_hexagon_C2_vmux>;
-
-// MPY - Multiply and use full result
-// Rdd = mpy[u](Rs, Rt)
-def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
-def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
-
-// Complex multiply real or imaginary
-def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>;
-def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>;
-
-// Complex multiply
-def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>;
-def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>;
-def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>;
-def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>;
-
-// Vector multiply halfwords
-// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>;
-def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>;
-
-// Rxx[+-]= mpy[u](Rs,Rt)
-def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
-def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
-def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
-def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
-
-// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
-def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
-def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
-def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
-
-// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
-def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
-def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
-def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
-def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
-
-// Rxx+=cmpy[ir](Rs,Rt)
-def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
-def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
-
-// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
-def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
-def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
-def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
-
-//*******************************************************************
-// CR
-//*******************************************************************
-def: T_Q_Q_pat<C2_not, int_hexagon_C2_not>;
-def: T_Q_Q_pat<C2_all8, int_hexagon_C2_all8>;
-def: T_Q_Q_pat<C2_any8, int_hexagon_C2_any8>;
-def: T_Q_Q_pat<C2_pxfer_map, int_hexagon_C2_pxfer_map>;
-
-def: T_Q_QQ_pat<C2_and, int_hexagon_C2_and>;
-def: T_Q_QQ_pat<C2_andn, int_hexagon_C2_andn>;
-def: T_Q_QQ_pat<C2_or, int_hexagon_C2_or>;
-def: T_Q_QQ_pat<C2_orn, int_hexagon_C2_orn>;
-def: T_Q_QQ_pat<C2_xor, int_hexagon_C2_xor>;
-
-// Multiply 32x32 and use lower result
-def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
-def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
-def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
-
-// Subtract and accumulate
-def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
-
-// Add and accumulate
-def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>;
-def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>;
-def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>;
-def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
-
-// XOR and XOR with destination
-def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
-
-// Vector dual multiply with round and pack
-def : T_PP_pat <M2_vdmpyrs_s0, int_hexagon_M2_vdmpyrs_s0>;
-def : T_PP_pat <M2_vdmpyrs_s1, int_hexagon_M2_vdmpyrs_s1>;
-
-// Vector multiply halfwords with round and pack
-def : T_RR_pat <M2_vmpy2s_s0pack, int_hexagon_M2_vmpy2s_s0pack>;
-def : T_RR_pat <M2_vmpy2s_s1pack, int_hexagon_M2_vmpy2s_s1pack>;
-
-// Multiply and use lower result
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyi>;
-def : T_RI_pat <M2_mpysmi, int_hexagon_M2_mpysmi>;
-
-// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyui>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpy_up, int_hexagon_M2_mpy_up>;
-def : T_RR_pat <M2_mpyu_up, int_hexagon_M2_mpyu_up>;
-def : T_RR_pat <M2_hmmpyh_rs1, int_hexagon_M2_hmmpyh_rs1>;
-def : T_RR_pat <M2_hmmpyl_rs1, int_hexagon_M2_hmmpyl_rs1>;
-def : T_RR_pat <M2_dpmpyss_rnd_s0, int_hexagon_M2_dpmpyss_rnd_s0>;
-
-// Complex multiply with round and pack
-// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def : T_RR_pat <M2_cmpyrs_s0, int_hexagon_M2_cmpyrs_s0>;
-def : T_RR_pat <M2_cmpyrs_s1, int_hexagon_M2_cmpyrs_s1>;
-def : T_RR_pat <M2_cmpyrsc_s0, int_hexagon_M2_cmpyrsc_s0>;
-def : T_RR_pat <M2_cmpyrsc_s1, int_hexagon_M2_cmpyrsc_s1>;
-
-//*******************************************************************
-// STYPE/ALU
-//*******************************************************************
-def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
-def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
-def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
-
-//*******************************************************************
-// STYPE/BIT
-//*******************************************************************
-
-// Count leading/trailing
-def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>;
-def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>;
-def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>;
-def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>;
-def: T_R_pat<S2_clb, int_hexagon_S2_clb>;
-def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>;
-def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
-def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>;
-def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>;
-
-// Compare bit mask
-def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>;
-def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
-def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>;
-
-// Vector shuffle
-def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
-def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
-def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
-def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
-
-// Vector truncate
-def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
-def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
-
-// Linear feedback-shift Iteration.
-def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
-
-// Vector align
-// Need custom lowering
-def : T_PPQ_pat <S2_valignrb, int_hexagon_S2_valignrb>;
-def : T_PPI_pat <S2_valignib, int_hexagon_S2_valignib>;
-
-// Vector splice
-def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
-def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
-
-// Shift by immediate and add
-def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
-
-// Extract bitfield
-def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>;
-def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>;
-def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>;
-def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
-
-// Insert bitfield
-def : Pat <(int_hexagon_S2_insert_rp I32:$src1, I32:$src2, I64:$src3),
- (S2_insert_rp I32:$src1, I32:$src2, I64:$src3)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)),
- (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>;
-
-def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2,
- u5_0ImmPred:$src3, u5_0ImmPred:$src4),
- (S2_insert I32:$src1, I32:$src2,
- u5_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2,
- u6_0ImmPred:$src3, u6_0ImmPred:$src4)),
- (i64 (S2_insertp I64:$src1, I64:$src2,
- u6_0ImmPred:$src3, u6_0ImmPred:$src4))>;
-
-// Innterleave/deinterleave
-def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
-def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
-
-// Set/Clear/Toggle Bit
-def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>;
-def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>;
-def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
-
-def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>;
-def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>;
-def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
-
-// Test Bit
-def: T_Q_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>;
-def: T_Q_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>;
-
-//*******************************************************************
-// STYPE/COMPLEX
-//*******************************************************************
-// Vector Complex conjugate
-def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
-
-// Vector Complex rotate
-def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
-
-//*******************************************************************
-// STYPE/PERM
-//*******************************************************************
-
-// Vector saturate without pack
-def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
-def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
-def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
-def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
-
-//*******************************************************************
-// STYPE/PRED
-//*******************************************************************
-
-// Predicate transfer
-def: Pat<(i32 (int_hexagon_C2_tfrpr I32:$Rs)),
- (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-def: Pat<(i32 (int_hexagon_C2_tfrrp I32:$Rs)),
- (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-
-// Mask generate from predicate
-def: Pat<(i64 (int_hexagon_C2_mask I32:$Rs)),
- (i64 (C2_mask (C2_tfrrp I32:$Rs)))>;
-
-// Viterbi pack even and odd predicate bits
-def: T_QQ_pat<C2_vitpack, int_hexagon_C2_vitpack>;
-
-//*******************************************************************
-// STYPE/SHIFT
-//*******************************************************************
-
-def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
-def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
-def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
-
-def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
-def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
-def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
-def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
-
-def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
-def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
-def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
-def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
-
-def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
-def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
-
-def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>;
-def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>;
-def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>;
-def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>;
-def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
-def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>;
-
-// Vector saturate and pack
-def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>;
-def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
-def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>;
-def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>;
-def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>;
-def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>;
-
-def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>;
-def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>;
-def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>;
-def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
-def : T_R_pat <S2_brev, int_hexagon_S2_brev>;
-def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>;
-
-def : T_R_pat <A2_abs, int_hexagon_A2_abs>;
-def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
-def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
-
-def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>;
-
-def : T_P_pat <A2_sat, int_hexagon_A2_sat>;
-def : T_R_pat <A2_sath, int_hexagon_A2_sath>;
-def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>;
-def : T_R_pat <A2_satub, int_hexagon_A2_satub>;
-def : T_R_pat <A2_satb, int_hexagon_A2_satb>;
-
-// Vector arithmetic shift right by immediate with truncate and pack.
-def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
-
-def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>;
-def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>;
-def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>;
-def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
-def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
- int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
-
-// Shift left by immediate with saturation.
-def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
-
//===----------------------------------------------------------------------===//
// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
//===----------------------------------------------------------------------===//
@@ -1277,11 +162,8 @@ def SDEC3 : SDNodeXForm<imm, [{
// values from the 4th input operand. Please note that subtraction is not
// needed for int_hexagon_S2_tableidxb_goodsyntax.
-def : Pat <(int_hexagon_S2_tableidxb_goodsyntax I32:$src1, I32:$src2,
- u4_0ImmPred:$src3, u5_0ImmPred:$src4),
- (S2_tableidxb I32:$src1, I32:$src2,
- u4_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxb_goodsyntax, S2_tableidxb,
+ IdImm>;
def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
SDEC1>;
def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
@@ -1289,52 +171,6 @@ def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
SDEC3>;
-//*******************************************************************
-// STYPE/VH
-//*******************************************************************
-
-// Vector absolute value halfwords with and without saturation
-// Rdd64=vabsh(Rss64)[:sat]
-def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
-def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
-
-// Vector shift halfwords by immediate
-// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
-def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
-def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
-def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
-
-// Vector shift halfwords by register
-// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
-def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
-def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
-def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
-
-//*******************************************************************
-// STYPE/VW
-//*******************************************************************
-
-// Vector absolute value words with and without saturation
-def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
-def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
-
-// Vector shift words by immediate.
-// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
-def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
-def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
-def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
-
-// Vector shift words by register.
-// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
-def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
-def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
-def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
-
-// Vector shift words with truncate and pack
-def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
-
// Load/store locked.
def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
@@ -1370,10 +206,13 @@ def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+ (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ Requires<[UseHVX]>;
+
def : Pat<(!cast<Intrinsic>(IntID#"_128B") HvxQR:$src1, IntRegs:$src2,
HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+ (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ Requires<[UseHVX]>;
}
defm : MaskedStore <V6_vS32b_qpred_ai, int_hexagon_V6_vmaskedstoreq>;
@@ -1398,7 +237,241 @@ def: T_R_pat<Y2_dczeroa, int_hexagon_Y2_dczeroa>;
def: T_RR_pat<Y4_l2fetch, int_hexagon_Y4_l2fetch>;
def: T_RP_pat<Y5_l2fetch, int_hexagon_Y5_l2fetch>;
-include "HexagonIntrinsicsV3.td"
-include "HexagonIntrinsicsV4.td"
-include "HexagonIntrinsicsV5.td"
-include "HexagonIntrinsicsV60.td"
+//
+// Patterns for optimizing code generations for HVX.
+
+def u3_64_ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)(64 - N->getSExtValue());
+ return isUInt<3>(v);
+}]>;
+
+def u3_128_ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)(128 - N->getSExtValue());
+ return isUInt<3>(v);
+}]>;
+
+def SUB_64_VAL : SDNodeXForm<imm, [{
+ int32_t Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(64 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+def SUB_128_VAL : SDNodeXForm<imm, [{
+ int32_t Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(128 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+let AddedComplexity = 100 in {
+def : Pat <(v16i32 (int_hexagon_V6_lo (v32i32 HvxWR:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_lo))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (int_hexagon_V6_hi (v32i32 HvxWR:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_hi))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_lo_128B (v64i32 HvxWR:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_lo))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi))>,
+ Requires<[UseHVX]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
+ (v512i1 (V6_vandvrt (v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
+ (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
+ (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
+ (v64i8 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
+ (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
+ (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+ (v512i1 (V6_vandvrt
+ (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+ (v1024i1 (V6_vandvrt
+ (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVX]>;
+}
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+ (v64i16 (V6_vpackwh_sat
+ (v32i32 (V6_hi HvxWR:$Vdd)),
+ (v32i32 (V6_lo HvxWR:$Vdd))))>,
+ Requires<[UseHVX]>;
+
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV55]>;
+
+multiclass T_VI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, u3_0ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_0ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, u3_64_ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1,
+ (SUB_64_VAL u3_64_ImmPred:$src2))>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_128_ImmPred:$src2),
+ (MI HvxVR:$src1, HvxVR:$src1, (SUB_128_VAL u3_128_ImmPred:$src2))>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ u3_0ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ u3_0ImmPred:$src3)>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_64_ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ (SUB_64_VAL u3_64_ImmPred:$src3))>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ u3_128_ImmPred:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ (SUB_128_VAL u3_128_ImmPred:$src3))>,
+ Requires<[UseHVX]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>,
+ Requires<[UseHVX]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+ IntRegs:$src3),
+ (MI HvxVR:$src1, HvxVR:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVX]>;
+}
+
+defm : T_VI_pat <V6_valignbi, int_hexagon_V6_vror>;
+defm : T_VI_inv_pat <V6_vlalignbi, int_hexagon_V6_vror>;
+
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignb>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignbi>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>;
+
+def: Pat<(int_hexagon_V6_vd0),
+ (V6_vd0)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+ (V6_vd0)>, Requires<[HasV60, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vdd0),
+ (V6_vdd0)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B),
+ (V6_vdd0)>, Requires<[HasV65, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+
+include "HexagonDepMapAsm2Intrin.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
deleted file mode 100644
index 6152cb098825..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ /dev/null
@@ -1,27 +0,0 @@
-//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector reduce complex multiply real or imaginary
-def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
-def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
-def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
-
-def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
-def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
-def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
-def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
-def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
-def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
deleted file mode 100644
index 2affe531515d..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ /dev/null
@@ -1,305 +0,0 @@
-//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V4 Architecture Extensions
-// Application-Level Specification
-// 80-V9418-12 Rev. A
-// June 15, 2010
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
-def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
-def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
-def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
-def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
-// Rxx[^]=vpmpyh(Rs,Rt)
-def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
-// Rxx^=pmpyw(Rs,Rt)
-def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
-
-//Rxx^=asr(Rss,Rt)
-def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
-//Rxx^=asl(Rss,Rt)
-def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
-//Rxx^=lsr(Rss,Rt)
-def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
-//Rxx^=lsl(Rss,Rt)
-def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
-def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
-def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
-def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
-def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
-
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
-
-def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
-def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
-def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
-
-def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
-def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
-def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
-
-def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
-def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
-def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
-def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
-def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
-def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
-def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
-def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
-def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
-def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
-
-def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
-def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
-def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
-def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
-def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
-def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
-
-def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
-def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
-def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
-
-def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
-def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
-def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
-
-def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
-def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
-
-def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
-def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
-def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
-def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
-def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
-def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
-def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
-
-// Complex multiply 32x16
-def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
-def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
-
-def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
-def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
-
-def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
-def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
-
-// Complex add/sub halfwords/words
-def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
-def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
-def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
-def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
-
-def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
-def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
-
-// Extract bitfield
-def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
-def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
-def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
-def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
-
-// Shift an immediate left by register amount
-def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
-
-// Vector reduce maximum halfwords
-def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
-def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
-
-// Vector reduce maximum words
-def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
-def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
-
-// Vector reduce minimum halfwords
-def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
-def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
-
-// Vector reduce minimum words
-def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
-def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
-
-// Rotate and reduce bytes
-def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
- u2_0ImmPred:$src3),
- (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3, u2_0ImmPred:$src4),
- (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
- IntRegs:$src3, u2_0ImmPred:$src4)>;
-
-// Vector conditional negate
-def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
-
-// Logical xor with xor accumulation
-def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
-
-// ALU64 - Vector min/max byte
-def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
-def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
-
-// Shift and add/sub/and/or
-def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
-def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
-def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
-def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
-def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
-def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
-def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
-def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
-
-// Split bitfield
-def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
-def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
-
-def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
-
-def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
-def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
-
-def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
-def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
-def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
-
-//*******************************************************************
-// ALU32/ALU
-//*******************************************************************
-
-// ALU32 / ALU / Logical Operations.
-def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
-def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
-
-//*******************************************************************
-// ALU32/PERM
-//*******************************************************************
-
-// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
-
-//*******************************************************************
-// ALU32/PRED
-//*******************************************************************
-
-// Compare
-def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
-
-// Compare To General Register.
-def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
-def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
-def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
-
-def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
-def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
-
-def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
-def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
-
-//*******************************************************************
-// CR
-//*******************************************************************
-
-// CR / Logical Operations On Predicates.
-def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
-def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
-def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
-def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
-def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
-def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
-def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
-def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
-
-//*******************************************************************
-// XTYPE/ALU
-//*******************************************************************
-
-// Add And Accumulate.
-
-def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
-def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
-
-
-// XTYPE / ALU / Logical-logical Words.
-def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
-def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
-def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
-def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
-def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
-def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
-def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
-def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
-def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
-def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
-def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
-
-def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
-def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
-def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
-
-// Modulo wrap.
-def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
-
-// Arithmetic/Convergent round
-// Rd=[cround|round](Rs,Rt)[:sat]
-// Rd=[cround|round](Rs,#u5)[:sat]
-def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
-def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
-
-def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
-def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
-
-def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
-def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
-
-def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index 29f67cffcf89..a852394f2160 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -7,9 +7,314 @@
//
//===----------------------------------------------------------------------===//
+def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
+def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
+def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
+def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
+def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
+
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
+
+def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
+def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
+def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
+
+def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
+def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
+def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
+def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
+def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
+def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
+def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
+def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
+def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
+def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
+def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
+def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
+def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
+def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
+def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
+def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
+def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
+
+def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
+def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
+def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
+def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
+def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
+def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+ u2_0ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2_0ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2_0ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
+
+def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
+
+//*******************************************************************
+// ALU32/ALU
+//*******************************************************************
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
+
+//*******************************************************************
+// ALU32/PERM
+//*******************************************************************
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
+
+//*******************************************************************
+// ALU32/PRED
+//*******************************************************************
+
+// Compare
+def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
+
+// Compare To General Register.
+def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
+def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
+def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
+
+def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+//*******************************************************************
+// CR
+//*******************************************************************
+
+// CR / Logical Operations On Predicates.
+def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
+def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
+def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
+def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
+def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
+def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
+def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
+def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
+
+//*******************************************************************
+// XTYPE/ALU
+//*******************************************************************
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
+
//Rdd[+]=vrmpybsu(Rss,Rtt)
//Rdd[+]=vrmpybuu(Rss,Rtt)
-let Predicates = [HasV5] in {
def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
@@ -31,7 +336,6 @@ def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
// Rd=vaddhub(Rss,Rtt):sat
def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
-}
def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index f9ed03909233..985f41f3a7d9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1970,12 +1970,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access
// is strided positively through memory, we say that the modified location
// starts at the pointer and has infinite size.
- LocationSize AccessSize = MemoryLocation::UnknownSize;
+ LocationSize AccessSize = LocationSize::unknown();
// If the loop iterates a fixed number of times, we can refine the access
// size to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+ AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+ StoreSize);
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -2360,7 +2361,7 @@ bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
return DT->dominates(BB, EB);
};
- if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
+ if (!all_of(ExitBlocks, DominatedByBB))
return false;
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 74c550ce8226..908ce24136c7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -105,6 +105,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
default:
if (!ResourcesModel->canReserveResources(*SU->getInstr()))
return false;
+ break;
case TargetOpcode::EXTRACT_SUBREG:
case TargetOpcode::INSERT_SUBREG:
case TargetOpcode::SUBREG_TO_REG:
@@ -215,8 +216,7 @@ void VLIWMachineScheduler::schedule() {
++su) if (SUnits[su].getDepth() > maxD) maxD =
SUnits[su].getDepth();
dbgs() << "Max Depth " << maxD << "\n";);
- LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
- .dumpAll(this));
+ LLVM_DEBUG(dump());
initQueues(TopRoots, BotRoots);
@@ -489,7 +489,7 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label,
else
dbgs() << " ";
dbgs() << "cost(" << Cost << ")\t";
- SU->dump(DAG);
+ DAG->dumpNode(*SU);
}
// Very detailed queue dump, to be used with higher verbosity levels.
@@ -982,7 +982,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
<< " Scheduling instruction in cycle "
<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
<< reportPackets() << ")\n";
- SU->dump(DAG));
+ DAG->dumpNode(*SU));
return SU;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 29c044b3b729..c3a5bd5d57bf 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -502,7 +502,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
OpStart = 4;
Changed = true;
- } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
+ } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset &&
+ OldMI->getOperand(2).isImm()) {
short NewOpCode = HII->changeAddrMode_io_abs(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
@@ -518,17 +519,19 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
- } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) {
- short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
- assert(NewOpCode >= 0 && "Invalid New opcode\n");
- MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.add(OldMI->getOperand(0));
- MIB.add(OldMI->getOperand(1));
- MIB.add(ImmOp);
- OpStart = 4;
- Changed = true;
- LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ } else if (ImmOpNum == 2) {
+ if (OldMI->getOperand(3).isImm() && OldMI->getOperand(3).getImm() == 0) {
+ short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
+ assert(NewOpCode >= 0 && "Invalid New opcode\n");
+ MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(ImmOp);
+ OpStart = 4;
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ }
}
if (Changed)
@@ -758,11 +761,13 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
// This could happen, for example, when DefR = R4, but the used
// register is D2.
+ // Change UseMI if replacement is possible. If any replacement failed,
+ // or wasn't attempted, make sure to keep the TFR.
+ bool Xformed = false;
if (UseMOnum >= 0 && InstrEvalResult[UseMI])
- // Change UseMI if replacement is possible.
- Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum);
- else
- KeepTfr = true;
+ Xformed = xformUseMI(MI, UseMI, UseN, UseMOnum);
+ Changed |= Xformed;
+ KeepTfr |= !Xformed;
}
if (!KeepTfr)
Deleted.insert(MI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 384fda4ce39a..89177564057e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -177,6 +177,11 @@ def UDEC32: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
}]>;
+class Subi<int From>: SDNodeXForm<imm,
+ "int32_t V = " # From # " - N->getSExtValue();" #
+ "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);"
+>;
+
def Log2_32: SDNodeXForm<imm, [{
uint32_t V = N->getZExtValue();
return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
@@ -218,6 +223,8 @@ def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
+def ToAext64: OutPatFrag<(ops node:$Rs),
+ (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>;
def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
(REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;
@@ -246,6 +253,9 @@ def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
+def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>;
+def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>;
+
def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
(PS_fi (i32 AddrFI:$Rs), imm:$off)>;
@@ -257,6 +267,23 @@ class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
class Not2<PatFrag P>
: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
+// If there is a constant operand that feeds the and/or instruction,
+// do not generate the compound instructions.
+// It is not always profitable, as some times we end up with a transfer.
+// Check the below example.
+// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
+// Instead this is preferable.
+// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
+class Su_ni1<PatFrag Op>
+ : PatFrag<Op.Operands, !head(Op.Fragments), [{
+ if (hasOneUse(N)){
+ // Check if Op1 is an immediate operand.
+ SDValue Op1 = N->getOperand(1);
+ return !dyn_cast<ConstantSDNode>(Op1);
+ }
+ return false;}],
+ Op.OperandTransform>;
+
class Su<PatFrag Op>
: PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
Op.OperandTransform>;
@@ -348,38 +375,34 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
// --(2) Type cast -------------------------------------------------------
//
-let Predicates = [HasV5] in {
- def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
- def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
+def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
+def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
- def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
- def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
- def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
- def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
- def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
- def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
- def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
- def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
- def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
- def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
- def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
- def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
+def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
- def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
- def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
- def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
- def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
-}
+def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5] in {
- def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
- def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
- def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
- def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-}
+def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
+def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
+def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
+def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
@@ -403,52 +426,48 @@ def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>;
-def: Pat<(i64 (sext I1:$Pu)),
- (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
- (C2_muxii PredRegs:$Pu, -1, 0))>;
-
-def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
-def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
-def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
-def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
-def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
-def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
-
def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>;
def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
-def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>;
+def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>;
+def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>;
let AddedComplexity = 20 in {
def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>;
def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
}
-def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+// Extensions from i1 or vectors of i1.
+def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
+ (C2_muxii PredRegs:$Pu, -1, 0))>;
+
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
def Vsplatpi: OutPatFrag<(ops node:$V),
(Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
-def: Pat<(v8i8 (zext V8I1:$Pu)),
- (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
-def: Pat<(v4i16 (zext V4I1:$Pu)),
- (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
-def: Pat<(v2i32 (zext V2I1:$Pu)),
- (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
-def: Pat<(v4i8 (zext V4I1:$Pu)),
- (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
-def: Pat<(v2i16 (zext V2I1:$Pu)),
+def: Pat<(v2i16 (azext V2I1:$Pu)),
(A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
+def: Pat<(v2i32 (azext V2I1:$Pu)),
+ (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
+def: Pat<(v4i8 (azext V4I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
+def: Pat<(v4i16 (azext V4I1:$Pu)),
+ (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
+def: Pat<(v8i8 (azext V8I1:$Pu)),
+ (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
-def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
@@ -582,31 +601,29 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
-let Predicates = [HasV5] in {
- def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
- def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
-
- def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
- def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
-}
+def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
+
+def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
@@ -729,32 +746,28 @@ class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
-let Predicates = [HasV5] in {
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
- def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
- def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
+def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
- def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
- def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-}
+def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
+def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-let Predicates = [HasV5] in {
- def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
- def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
- def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
- def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
-}
+def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
// --(6) Select ----------------------------------------------------------
@@ -784,32 +797,30 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-let Predicates = [HasV5] in {
- def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
- (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
- def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
- (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
- def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
- (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
- def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
- (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
- (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
+ (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
+ (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
+def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
+ (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
+def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
+ (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
+ (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
- def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
- (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
- def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
- (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
+def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
+ (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
+def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
+ (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
- def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
- (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
- def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
- (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-}
+def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
+ (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
+ (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
@@ -872,7 +883,7 @@ let AddedComplexity = 200 in {
defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
}
-let AddedComplexity = 100, Predicates = [HasV5] in {
+let AddedComplexity = 100 in {
defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@@ -989,15 +1000,95 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
-let Predicates = [HasV60] in {
+// Funnel shifts.
+def IsMul8_U3: PatLeaf<(i32 imm), [{
+ uint64_t V = N->getZExtValue();
+ return V % 8 == 0 && isUInt<3>(V / 8);
+}]>;
+
+def Divu8: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32);
+}]>;
+
+// Funnel shift-left.
+def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>;
+def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
+def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Combined SDNodeXForm: (Divu8 (Subi<64> $S))
+def Divu64_8: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8,
+ SDLoc(N), MVT::i32);
+}]>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+ def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
+ (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+ (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
}
+let AddedComplexity = 30 in {
+ def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>;
+ def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>;
+ def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>;
+ def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>;
+
+// Funnel shift-right.
+def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>;
+def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+ (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
+def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+ (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+ def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
+ (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+ (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
+ def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>;
+ def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>;
+}
+let AddedComplexity = 30 in {
+ def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>;
+ def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>;
+ def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>;
+ def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>;
+
def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
- (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
+ (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;
// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
let AddedComplexity = 120 in
@@ -1119,14 +1210,6 @@ def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)),
(Combinew (A2_combine_ll I32:$d, I32:$c),
(A2_combine_ll I32:$b, I32:$a))>;
-def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
- (i32 8)),
- (i32 (zextloadi8 (add I32:$b, 2)))),
- (i32 16)),
- (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
- (zextloadi8 I32:$b)),
- (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
-
let AddedComplexity = 200 in {
def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
(A2_combine_ll I32:$Rt, I32:$Rs)>;
@@ -1172,6 +1255,19 @@ def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
(S2_asl_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S),
+ (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt),
+ (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+
// --(9) Arithmetic/bitwise ----------------------------------------------
//
@@ -1182,17 +1278,15 @@ def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
-let Predicates = [HasV5] in {
- def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
- def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
+def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
+def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
- def: Pat<(fabs F64:$Rs),
- (Combinew (S2_clrbit_i (HiReg $Rs), 31),
- (i32 (LoReg $Rs)))>;
- def: Pat<(fneg F64:$Rs),
- (Combinew (S2_togglebit_i (HiReg $Rs), 31),
- (i32 (LoReg $Rs)))>;
-}
+def: Pat<(fabs F64:$Rs),
+ (Combinew (S2_clrbit_i (HiReg $Rs), 31),
+ (i32 (LoReg $Rs)))>;
+def: Pat<(fneg F64:$Rs),
+ (Combinew (S2_togglebit_i (HiReg $Rs), 31),
+ (i32 (LoReg $Rs)))>;
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
@@ -1258,12 +1352,15 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
-let Predicates = [HasV5] in {
- def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
- def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
- def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
- def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
- def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
+def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
+def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
+def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
+
+let Predicates = [HasV66] in {
+ def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>;
+ def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>;
}
// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
@@ -1272,6 +1369,8 @@ let AddedComplexity = 10 in {
def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
+ let Predicates = [HasV66] in
+ def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>;
}
def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
@@ -1344,16 +1443,16 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
def: Pat<(add Sext64:$Rs, I64:$Rt),
(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
-def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>;
-def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
-def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
+def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>;
+def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
// one argument matches the patterns below, and with the other argument
@@ -1497,14 +1596,12 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
-let Predicates = [HasV5] in {
- def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
- (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
- def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
- def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-}
+def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
+ (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
+ (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
+ (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
@@ -1515,9 +1612,9 @@ def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
// Add/subtract two v4i8: Hexagon does not have an insn for this one, so
// we use the double add v8i8, and use only the low part of the result.
def: Pat<(add V4I8:$Rs, V4I8:$Rt),
- (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>;
def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
- (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+ (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>;
// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
// half-words, and saturates the result to a 32-bit value, except the
@@ -1531,14 +1628,12 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
// Multiplies two v4i8 vectors.
def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
- (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
- Requires<[HasV5]>;
+ (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;
// Multiplies two v8i8 vectors.
def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
- (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
- Requires<[HasV5]>;
+ (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;
// --(10) Bit ------------------------------------------------------------
@@ -1868,10 +1963,10 @@ let AddedComplexity = 20 in {
}
let AddedComplexity = 30 in {
- defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
- defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
- defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
- defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
+ defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>;
+ defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>;
+ defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
@@ -1906,13 +2001,13 @@ let AddedComplexity = 60 in {
def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
- def: Loadxum_pat<extloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
+ def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>;
def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
- def: Loadxum_pat<extloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
+ def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>;
def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
- def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
+ def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>;
}
let AddedComplexity = 40 in {
@@ -1952,25 +2047,25 @@ let AddedComplexity = 20 in {
}
let AddedComplexity = 40 in {
- def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
- def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
- def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
let AddedComplexity = 20 in {
- def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>;
+ def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
- def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>;
+ def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
- def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>;
+ def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}
@@ -2002,13 +2097,13 @@ let AddedComplexity = 60 in {
}
let AddedComplexity = 30 in {
- def: Loadam_pat<extloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
+ def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>;
def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>;
def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
- def: Loadam_pat<extloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
+ def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>;
def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>;
def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
- def: Loadam_pat<extloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
+ def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>;
def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>;
def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
@@ -2044,13 +2139,13 @@ let AddedComplexity = 100 in {
}
let AddedComplexity = 70 in {
- def: Loadam_pat<extloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
+ def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>;
def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>;
def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
- def: Loadam_pat<extloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
+ def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>;
def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>;
def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
- def: Loadam_pat<extloadi32, i64, addrgp, ToZext64, L2_loadrigp>;
+ def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>;
def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>;
def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>;
@@ -2306,16 +2401,26 @@ let AddedComplexity = 140 in {
// GP-relative address
let AddedComplexity = 120 in {
- def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<store, I32, addrgp, S2_storerigp>;
- def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
- def: Storea_pat<store, F32, addrgp, S2_storerigp>;
- def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<store, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, V4I8, addrgp, S2_storerigp>;
+ def: Storea_pat<store, V2I16, addrgp, S2_storerigp>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>;
+ def: Storea_pat<store, F32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>;
def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>;
def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>;
@@ -2325,16 +2430,26 @@ let AddedComplexity = 120 in {
// Absolute address
let AddedComplexity = 110 in {
- def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
- def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
- def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
- def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
- def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
- def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
- def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
- def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
- def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
+ def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>;
+ def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
+ def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>;
+ def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>;
def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>;
def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>;
@@ -2344,12 +2459,17 @@ let AddedComplexity = 110 in {
// Reg<<S + Imm
let AddedComplexity = 100 in {
- def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
- def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
- def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
- def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
- def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
- def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
+ def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
+ def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>;
+ def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
+ def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;
def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
(S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
@@ -2357,12 +2477,17 @@ let AddedComplexity = 100 in {
// Reg<<S + Reg
let AddedComplexity = 90 in {
- def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
- def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
- def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
- def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
+ def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>;
+ def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
+ def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
(S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
@@ -2414,20 +2539,30 @@ let AddedComplexity = 70 in {
// Fi+Imm, Fi, store-register
let AddedComplexity = 60 in {
- defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
- defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
- defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
- defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
- defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
- defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
+ defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
+ defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>;
+ defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
+ defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;
- def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
- def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
- def: Storexi_fi_pat<store, I32, S2_storeri_io>;
- def: Storexi_fi_pat<store, I64, S2_storerd_io>;
- def: Storexi_fi_pat<store, F32, S2_storeri_io>;
- def: Storexi_fi_pat<store, F64, S2_storerd_io>;
+ def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
+ def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
+ def: Storexi_fi_pat<store, I32, S2_storeri_io>;
+ def: Storexi_fi_pat<store, V4I8, S2_storeri_io>;
+ def: Storexi_fi_pat<store, V2I16, S2_storeri_io>;
+ def: Storexi_fi_pat<store, I64, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V8I8, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V4I16, S2_storerd_io>;
+ def: Storexi_fi_pat<store, V2I32, S2_storerd_io>;
+ def: Storexi_fi_pat<store, F32, S2_storeri_io>;
+ def: Storexi_fi_pat<store, F64, S2_storerd_io>;
def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
}
@@ -2452,32 +2587,47 @@ let AddedComplexity = 50 in {
// Reg+Imm, store-register
let AddedComplexity = 40 in {
- defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
- defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
- defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
- defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
+ defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
+ defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;
defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>;
defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>;
defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>;
defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
- defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
- defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
- defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>;
+ defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>;
}
// Reg+Reg
let AddedComplexity = 30 in {
- def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Storexr_add_pat<store, I32, S4_storeri_rr>;
- def: Storexr_add_pat<store, I64, S4_storerd_rr>;
- def: Storexr_add_pat<store, F32, S4_storeri_rr>;
- def: Storexr_add_pat<store, F64, S4_storerd_rr>;
+ def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexr_add_pat<store, I32, S4_storeri_rr>;
+ def: Storexr_add_pat<store, V4I8, S4_storeri_rr>;
+ def: Storexr_add_pat<store, V2I16, S4_storeri_rr>;
+ def: Storexr_add_pat<store, I64, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V8I8, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V4I16, S4_storerd_rr>;
+ def: Storexr_add_pat<store, V2I32, S4_storerd_rr>;
+ def: Storexr_add_pat<store, F32, S4_storeri_rr>;
+ def: Storexr_add_pat<store, F64, S4_storerd_rr>;
def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
(S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
@@ -2496,22 +2646,32 @@ let AddedComplexity = 20 in {
// Reg, store-register
let AddedComplexity = 10 in {
- def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
- def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
- def: Storexi_base_pat<store, I32, S2_storeri_io>;
- def: Storexi_base_pat<store, I64, S2_storerd_io>;
- def: Storexi_base_pat<store, F32, S2_storeri_io>;
- def: Storexi_base_pat<store, F64, S2_storerd_io>;
+ def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
+ def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
+ def: Storexi_base_pat<store, I32, S2_storeri_io>;
+ def: Storexi_base_pat<store, V4I8, S2_storeri_io>;
+ def: Storexi_base_pat<store, V2I16, S2_storeri_io>;
+ def: Storexi_base_pat<store, I64, S2_storerd_io>;
+ def: Storexi_base_pat<store, V8I8, S2_storerd_io>;
+ def: Storexi_base_pat<store, V4I16, S2_storerd_io>;
+ def: Storexi_base_pat<store, V2I32, S2_storerd_io>;
+ def: Storexi_base_pat<store, F32, S2_storeri_io>;
+ def: Storexi_base_pat<store, F64, S2_storerd_io>;
def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
- def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>;
+ def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>;
}
@@ -2922,6 +3082,8 @@ def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
def: Pat<(HexagonBARRIER), (Y2_barrier)>;
+def: Pat<(trap), (J2_trap0 (i32 0))>;
+
// Read cycle counter.
def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
index fd7466349ecd..b9748c7e189c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -208,6 +208,7 @@ class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
let isPredicable = 0; // !if(isPred, 0, 1);
let isPredicated = 0; // isPred;
let isPredicatedFalse = isFalse;
+ let Itinerary = itin;
}
def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
@@ -525,11 +526,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_4403ca65>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e93a3d71>;
def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_2fc0c436>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_44d3da28>;
}
}
@@ -546,11 +547,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
def NAME#_pci : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_9fdb5406>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e86aa961>;
def NAME#_pcr : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_f86c328a>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_da97ee82>;
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2e11f875c0f9..9b8f4e07376f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -118,18 +118,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasEHReturn = MF->getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
- switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
- case Hexagon::ArchEnum::V4:
- case Hexagon::ArchEnum::V5:
- case Hexagon::ArchEnum::V55:
- case Hexagon::ArchEnum::V60:
- case Hexagon::ArchEnum::V62:
- case Hexagon::ArchEnum::V65:
- return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
- }
-
- llvm_unreachable("Callee saved registers requested for unknown architecture "
- "version");
+ return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}
@@ -323,6 +312,7 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
static const unsigned ISub[] = { Hexagon::isub_lo, Hexagon::isub_hi };
static const unsigned VSub[] = { Hexagon::vsub_lo, Hexagon::vsub_hi };
+ static const unsigned WSub[] = { Hexagon::wsub_lo, Hexagon::wsub_hi };
switch (RC.getID()) {
case Hexagon::CtrRegs64RegClassID:
@@ -330,6 +320,8 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
return ISub[GenIdx];
case Hexagon::HvxWRRegClassID:
return VSub[GenIdx];
+ case Hexagon::HvxVQRRegClassID:
+ return WSub[GenIdx];
}
if (const TargetRegisterClass *SuperRC = *RC.getSuperClasses())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 497dc45236b1..3e7b63a462f0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -39,8 +39,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 1fe1ef4ac572..da90911e2c05 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -82,13 +82,14 @@ let Namespace = "Hexagon" in {
def isub_hi : SubRegIndex<32, 32>;
def vsub_lo : SubRegIndex<512>;
def vsub_hi : SubRegIndex<512, 512>;
+ def wsub_lo : SubRegIndex<1024>;
+ def wsub_hi : SubRegIndex<1024, 1024>;
def subreg_overflow : SubRegIndex<1, 0>;
// Integer registers.
foreach i = 0-28 in {
def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>;
}
-
def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
@@ -206,6 +207,18 @@ let Namespace = "Hexagon" in {
def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
}
+ // Aliases of the V* registers used to hold quad vec values.
+ let SubRegIndices = [wsub_lo, wsub_hi], CoveredBySubRegs = 1 in {
+ def VQ0 : Rd< 0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>;
+ def VQ1 : Rd< 4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>;
+ def VQ2 : Rd< 8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>;
+ def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>;
+ def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>;
+ def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>;
+ def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>;
+ def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>;
+ }
+
// Vector Predicate registers.
def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
@@ -295,29 +308,6 @@ def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
// HVX register classes
-// Register classes.
-//
-// FIXME: the register order should be defined in terms of the preferred
-// allocation order...
-//
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
- (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
- R10, R11, R29, R30, R31)>;
-
-// Registers are listed in reverse order for allocation preference reasons.
-def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
- (add R23, R22, R21, R20, R19, R18, R17, R16,
- R7, R6, R5, R4, R3, R2, R1, R0)>;
-
-def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
- (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
-
-def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
- (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
-
-def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
- (add D11, D10, D9, D8, D3, D2, D1, D0)>;
-
def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
(add (sequence "V%u", 0, 31), VTMP)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
@@ -336,6 +326,32 @@ def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
[RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
}
+def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
+ (add (sequence "VQ%u", 0, 7))> {
+ let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [RegInfo<2048,2048,2048>, RegInfo<4096,4096,4096>, RegInfo<2048,2048,2048>]>;
+}
+
+// Core register classes
+
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+ (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
+ R10, R11, R29, R30, R31)>;
+
+// Registers are listed in reverse order for allocation preference reasons.
+def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add R23, R22, R21, R20, R19, R18, R17, R16,
+ R7, R6, R5, R4, R3, R2, R1, R0)>;
+
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+ (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+ (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
+ (add D11, D10, D9, D8, D3, D2, D1, D0)>;
+
let Size = 32 in
def PredRegs : RegisterClass<"Hexagon",
[i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, (add P0, P1, P2, P3)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
index a1dfb66017a5..1024198e9b3f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -27,6 +27,7 @@ def CVI_SHIFT : FuncUnit;
def CVI_MPY0 : FuncUnit;
def CVI_MPY1 : FuncUnit;
def CVI_LD : FuncUnit;
+def CVI_ZW : FuncUnit; // Z register write port
// Combined functional units.
def CVI_XLSHF : FuncUnit;
@@ -57,10 +58,10 @@ include "HexagonDepIICScalar.td"
include "HexagonDepIICHVX.td"
//===----------------------------------------------------------------------===//
-// V4 Machine Info +
+// V5 Machine Info +
//===----------------------------------------------------------------------===//
-include "HexagonScheduleV4.td"
+include "HexagonScheduleV5.td"
// V55 Machine Info +
include "HexagonScheduleV55.td"
@@ -84,3 +85,9 @@ include "HexagonScheduleV62.td"
//===----------------------------------------------------------------------===//
include "HexagonScheduleV65.td"
+
+//===----------------------------------------------------------------------===//
+// V66 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV66.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
index 69b704a805b8..9a893f6dde02 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
@@ -1,4 +1,4 @@
-//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -10,8 +10,8 @@
def LD_tc_ld_SLOT01 : InstrItinClass;
def ST_tc_st_SLOT01 : InstrItinClass;
-class HexagonV4PseudoItin {
- list<InstrItinData> V4PseudoItin_list = [
+class HexagonV5PseudoItin {
+ list<InstrItinData> V5PseudoItin_list = [
InstrItinData<PSEUDO, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -20,27 +20,27 @@ class HexagonV4PseudoItin {
];
}
-def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin {
- list<InstrItinData> V4Itin_list = [
+def HexagonV5ItinList : DepScalarItinV5, HexagonV5PseudoItin {
+ list<InstrItinData> V5Itin_list = [
InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>
];
list<InstrItinData> ItinList =
- !listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list);
+ !listconcat(V5Itin_list, DepScalarItinV5_list, V5PseudoItin_list);
}
-def HexagonItinerariesV4 :
+def HexagonItinerariesV5 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP],
- [Hex_FWD], HexagonV4ItinList.ItinList>;
+ [Hex_FWD], HexagonV5ItinList.ItinList>;
-def HexagonModelV4 : SchedMachineModel {
+def HexagonModelV5 : SchedMachineModel {
// Max issue per cycle == bundle width.
let IssueWidth = 4;
- let Itineraries = HexagonItinerariesV4;
+ let Itineraries = HexagonItinerariesV5;
let LoadLatency = 1;
let CompleteModel = 0;
}
//===----------------------------------------------------------------------===//
-// Hexagon V4 Resource Definitions -
+// Hexagon V5 Resource Definitions -
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
index a2544c92a72c..861a8d2b0339 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -65,7 +65,7 @@ def HexagonItinerariesV60 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD], HexagonV60ItinList.ItinList>;
def HexagonModelV60 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
index a0a8595f185f..1c274191277c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -21,7 +21,7 @@ def HexagonItinerariesV62 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD], HexagonV62ItinList.ItinList>;
def HexagonModelV62 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
index e3b1313923f5..46a79d521795 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
@@ -23,7 +23,7 @@ def HexagonItinerariesV65 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
- CVI_ALL_NOMEM],
+ CVI_ALL_NOMEM, CVI_ZW],
[Hex_FWD, HVX_FWD],
HexagonV65ItinList.ItinList>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
new file mode 100644
index 000000000000..38e3d21d3701
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
@@ -0,0 +1,41 @@
+//=-HexagonScheduleV66.td - HexagonV66 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able
+// to get rid of them soon.
+
+def HexagonV66ItinList : DepScalarItinV66, ScalarItin,
+ DepHVXItinV66, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV66_list, ScalarItin_list,
+ DepHVXItinV66_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV66 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV66ItinList.ItinList>;
+
+def HexagonModelV66 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV66;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V66 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index c41f0d3c085c..55de25120943 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -63,7 +63,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
auto &HST = Fn.getSubtarget<HexagonSubtarget>();
auto &HTM = static_cast<const HexagonTargetMachine&>(Fn.getTarget());
auto &TLOF = *HTM.getObjFileLowering();
- if (HST.useSmallData() && TLOF.isSmallDataEnabled())
+ if (HST.useSmallData() && TLOF.isSmallDataEnabled(HTM))
return false;
const TargetInstrInfo *TII = HST.getInstrInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 991af047387e..61c2121163b8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -578,7 +578,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
};
for (auto &G : SGs) {
assert(G.size() > 1 && "Store group with fewer than 2 elements");
- llvm::sort(G.begin(), G.end(), Less);
+ llvm::sort(G, Less);
Changed |= processStoreGroup(G);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 0686d6eb6118..9c77135c2f2f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -93,12 +93,12 @@ HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
{"generic", Hexagon::ArchEnum::V60},
- {"hexagonv4", Hexagon::ArchEnum::V4},
{"hexagonv5", Hexagon::ArchEnum::V5},
{"hexagonv55", Hexagon::ArchEnum::V55},
{"hexagonv60", Hexagon::ArchEnum::V60},
{"hexagonv62", Hexagon::ArchEnum::V62},
{"hexagonv65", Hexagon::ArchEnum::V65},
+ {"hexagonv66", Hexagon::ArchEnum::V66},
};
auto FoundIt = CpuTable.find(CPUString);
@@ -276,11 +276,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
if (!L0.mayLoad() || L0.mayStore() ||
HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
continue;
- int Offset0;
+ int64_t Offset0;
unsigned Size0;
- unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0);
+ MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
// Is the access size is longer than the L1 cache line, skip the check.
- if (Base0 == 0 || Size0 >= 32)
+ if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
continue;
// Scan only up to 32 instructions ahead (to avoid n^2 complexity).
for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
@@ -289,10 +289,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
if (!L1.mayLoad() || L1.mayStore() ||
HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
continue;
- int Offset1;
+ int64_t Offset1;
unsigned Size1;
- unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1);
- if (Base1 == 0 || Size1 >= 32 || Base0 != Base1)
+ MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
+ if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
+ BaseOp0->getReg() != BaseOp1->getReg())
continue;
// Check bits 3 and 4 of the offset: if they differ, a bank conflict
// is unlikely.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index dc8d173a5057..3a5acb53682c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -52,14 +52,16 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool UseNewValueJumps = false;
bool UseNewValueStores = false;
bool UseSmallData = false;
+ bool UseZRegOps = false;
bool HasMemNoShuf = false;
bool EnableDuplex = false;
bool ReservedR19 = false;
+ bool NoreturnStackElim = false;
public:
Hexagon::ArchEnum HexagonArchVersion;
- Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
+ Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
CodeGenOpt::Level OptLevel;
/// True if the target should use Back-Skip-Back scheduling. This is the
/// default for V60.
@@ -150,6 +152,12 @@ public:
bool hasV65OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V65;
}
+ bool hasV66Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V66;
+ }
+ bool hasV66OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V66;
+ }
bool useLongCalls() const { return UseLongCalls; }
bool useMemops() const { return UseMemops; }
@@ -157,8 +165,11 @@ public:
bool useNewValueJumps() const { return UseNewValueJumps; }
bool useNewValueStores() const { return UseNewValueStores; }
bool useSmallData() const { return UseSmallData; }
+ bool useZRegOps() const { return UseZRegOps; }
- bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
+ bool useHVXOps() const {
+ return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
+ }
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
@@ -166,6 +177,8 @@ public:
bool hasReservedR19() const { return ReservedR19; }
bool usePredicatedCalls() const;
+ bool noreturnStackElim() const { return NoreturnStackElim; }
+
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 2c75e9139ad7..ddfda7e27793 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -97,6 +97,10 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
+static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true),
+ cl::desc("Simplify the CFG after atomic expansion pass"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -149,7 +153,6 @@ namespace llvm {
FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonFixupHwLoops();
- FunctionPass *createHexagonGatherPacketize();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMux();
@@ -161,7 +164,7 @@ namespace llvm {
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonOptAddrMode();
- FunctionPass *createHexagonPacketizer();
+ FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
@@ -177,12 +180,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
@@ -219,7 +216,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
"i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
"v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), (HexagonNoOpt ? CodeGenOpt::None : OL)),
+ getEffectiveCodeModel(CM, CodeModel::Small),
+ (HexagonNoOpt ? CodeGenOpt::None : OL)),
TLOF(make_unique<HexagonTargetObjectFile>()) {
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
initAsmInfo();
@@ -311,7 +309,10 @@ void HexagonPassConfig::addIRPasses() {
}
addPass(createAtomicExpandPass());
+
if (!NoOpt) {
+ if (EnableInitialCFGCleanup)
+ addPass(createCFGSimplificationPass(1, true, true, false, true));
if (EnableLoopPrefetch)
addPass(createLoopDataPrefetchPass());
if (EnableCommGEP)
@@ -402,7 +403,6 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonBranchRelaxation());
- // Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
addPass(createHexagonFixupHwLoops());
@@ -411,12 +411,8 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonGenMux());
}
- // Create packets for 2 instructions that consitute a gather instruction.
- // Do this regardless of the opt level.
- addPass(createHexagonGatherPacketize(), false);
-
- if (!NoOpt)
- addPass(createHexagonPacketizer(), false);
+ // Packetization is mandatory: it handles gather/scatter at all opt levels.
+ addPass(createHexagonPacketizer(NoOpt), false);
if (EnableVectorPrint)
addPass(createHexagonVectorPrint(), false);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e771f383dffa..2185bf8eebc6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -199,6 +199,10 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
/// section.
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
const TargetMachine &TM) const {
+ bool HaveSData = isSmallDataEnabled(TM);
+ if (!HaveSData)
+ LLVM_DEBUG(dbgs() << "Small-data allocation is disabled, but symbols "
+ "may have explicit section assignments...\n");
// Only global variables, not functions.
LLVM_DEBUG(dbgs() << "Checking if value is in small-data, -G"
<< SmallDataThreshold << ": \"" << GO->getName() << "\": ");
@@ -218,6 +222,12 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return IsSmall;
}
+ // If sdata is disabled, stop the checks here.
+ if (!HaveSData) {
+ LLVM_DEBUG(dbgs() << "no, small-data allocation is disabled\n");
+ return false;
+ }
+
if (GVar->isConstant()) {
LLVM_DEBUG(dbgs() << "no, is a constant\n");
return false;
@@ -263,8 +273,9 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return true;
}
-bool HexagonTargetObjectFile::isSmallDataEnabled() const {
- return SmallDataThreshold > 0;
+bool HexagonTargetObjectFile::isSmallDataEnabled(const TargetMachine &TM)
+ const {
+ return SmallDataThreshold > 0 && !TM.isPositionIndependent();
}
unsigned HexagonTargetObjectFile::getSmallDataSize() const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index eff44f097e03..18863630fde2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -29,7 +29,7 @@ namespace llvm {
bool isGlobalInSmallSection(const GlobalObject *GO,
const TargetMachine &TM) const;
- bool isSmallDataEnabled() const;
+ bool isSmallDataEnabled(const TargetMachine &TM) const;
unsigned getSmallDataSize() const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index a496a17788d5..c942f645aa88 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -54,7 +54,7 @@ bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
return false;
if (ST.isHVXVectorType(VecVT.getSimpleVT()))
return true;
- auto Action = TLI.getPreferredVectorAction(VecVT);
+ auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
return Action == TargetLoweringBase::TypeWidenVector;
}
@@ -206,9 +206,13 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) {
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
+ return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index a232f99fc407..5c6f85584ec2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -98,6 +98,9 @@ public:
bool prefersVectorizedAddressing() {
return false;
}
+ bool enableInterleavedAccessVectorization() {
+ return true;
+ }
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
@@ -120,7 +123,8 @@ public:
bool VariableMask, unsigned Alignment);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I);
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 56ab69db9bd1..722699907ca0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -77,7 +77,7 @@ extern cl::opt<bool> ScheduleInlineAsm;
namespace llvm {
-FunctionPass *createHexagonPacketizer();
+FunctionPass *createHexagonPacketizer(bool Minimal);
void initializeHexagonPacketizerPass(PassRegistry&);
} // end namespace llvm
@@ -88,7 +88,8 @@ namespace {
public:
static char ID;
- HexagonPacketizer() : MachineFunctionPass(ID) {}
+ HexagonPacketizer(bool Min = false)
+ : MachineFunctionPass(ID), Minimal(Min) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -112,6 +113,7 @@ namespace {
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
+ const bool Minimal;
};
} // end anonymous namespace
@@ -129,8 +131,9 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
MachineLoopInfo &MLI, AliasAnalysis *AA,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+ const MachineBranchProbabilityInfo *MBPI, bool Minimal)
+ : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
+ Minimal(Minimal) {
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@@ -200,9 +203,6 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
auto &HST = MF.getSubtarget<HexagonSubtarget>();
- if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction()))
- return false;
-
HII = HST.getInstrInfo();
HRI = HST.getRegisterInfo();
auto &MLI = getAnalysis<MachineLoopInfo>();
@@ -213,7 +213,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
HII->genAllInsnTimingClasses(MF);
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
+ bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
+ skipFunction(MF.getFunction());
+ HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -226,7 +228,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
// Here, Insn 1 will result in the dependence graph not emitting an output
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
- for (auto &MB : MF) {
+ for (MachineBasicBlock &MB : MF) {
auto End = MB.end();
auto MI = MB.begin();
while (MI != End) {
@@ -766,7 +768,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
- // This handles V4 base+index registers.
+ // This handles base+index registers.
// The following store can not be dot new.
// Eg. r0 = add(r0, #3)
// memw(r1+r0<<#2) = r0
@@ -836,11 +838,7 @@ static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
return false;
}
-// Check to see if an instruction can be dot new
-// There are three kinds.
-// 1. dot new on predicate - V2/V3/V4
-// 2. dot new on stores NV/ST - V4
-// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+// Check to see if an instruction can be dot new.
bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC) {
@@ -1073,9 +1071,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
if (MI.isInlineAsm() && !ScheduleInlineAsm)
return true;
- // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
- // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
- // They must not be grouped with other instructions in a packet.
if (isSchedBarrier(MI))
return true;
@@ -1110,6 +1105,10 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
MJ.isCall() || MJ.isTerminator();
+ // New-value stores cannot coexist with any other stores.
+ if (HII.isNewValueStore(MI) && MJ.mayStore())
+ return true;
+
switch (MI.getOpcode()) {
case Hexagon::S2_storew_locked:
case Hexagon::S4_stored_locked:
@@ -1283,8 +1282,8 @@ bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
return false;
}
-bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
- const MachineInstr &J) {
+bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
+ const MachineInstr &J) {
bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
bool StoreI = I.mayStore(), StoreJ = J.mayStore();
if ((SysI && StoreJ) || (SysJ && StoreI))
@@ -1337,10 +1336,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (Dependence)
return false;
- // V4 allows dual stores. It does not allow second store, if the first
- // store is not in SLOT0. New value store, new value jump, dealloc_return
- // and memop always take SLOT0. Arch spec 3.4.4.2.
- Dependence = hasV4SpecificDependence(I, J);
+ // Dual-store does not allow second store, if the first store is not
+ // in SLOT0. New value store, new value jump, dealloc_return and memop
+ // always take SLOT0. Arch spec 3.4.4.2.
+ Dependence = hasDualStoreDependence(I, J);
if (Dependence)
return false;
@@ -1499,10 +1498,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
}
// For Order dependences:
- // 1. On V4 or later, volatile loads/stores can be packetized together,
- // unless other rules prevent is.
+ // 1. Volatile loads/stores can be packetized together, unless other
+ // rules prevent is.
// 2. Store followed by a load is not allowed.
- // 3. Store followed by a store is only valid on V4 or later.
+ // 3. Store followed by a store is valid.
// 4. Load followed by any memory operation is allowed.
if (DepType == SDep::Order) {
if (!PacketizeVolatiles) {
@@ -1549,7 +1548,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
continue;
}
- // For V4, special case ALLOCFRAME. Even though there is dependency
+ // Special case for ALLOCFRAME: even though there is dependency
// between ALLOCFRAME and subsequent store, allow it to be packetized
// in a same packet. This implies that the store is using the caller's
// SP. Hence, offset needs to be updated accordingly.
@@ -1569,6 +1568,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (GlueAllocframeStore)
continue;
}
+ break;
default:
break;
}
@@ -1652,6 +1652,9 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
return false;
}
+ if (!Coexist)
+ return false;
+
if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
FoundSequentialDependence = false;
Dependence = false;
@@ -1759,8 +1762,8 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
}
void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator MI) {
- // Replace VLIWPacketizerList::endPacket(MBB, MI).
+ MachineBasicBlock::iterator EndMI) {
+ // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {
@@ -1769,25 +1772,32 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
}
memShufDisabled = getmemShufDisabled();
- if (CurrentPacketMIs.size() > 1) {
- MachineBasicBlock::instr_iterator FirstMI(CurrentPacketMIs.front());
- MachineBasicBlock::instr_iterator LastMI(MI.getInstrIterator());
- finalizeBundle(*MBB, FirstMI, LastMI);
+ OldPacketMIs.clear();
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
+ for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
+ OldPacketMIs.push_back(&I);
+ }
+ CurrentPacketMIs.clear();
+ if (OldPacketMIs.size() > 1) {
+ MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
+ MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
+ finalizeBundle(*MBB, FirstMI, LastMI);
auto BundleMII = std::prev(FirstMI);
if (memShufDisabled)
HII->setBundleNoShuf(BundleMII);
setmemShufDisabled(false);
}
- OldPacketMIs = CurrentPacketMIs;
- CurrentPacketMIs.clear();
ResourceTracker->clearResources();
LLVM_DEBUG(dbgs() << "End packet\n");
}
bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
+ if (Minimal)
+ return false;
return !producesStall(MI);
}
@@ -1860,6 +1870,6 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createHexagonPacketizer() {
- return new HexagonPacketizer();
+FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
+ return new HexagonPacketizer(Minimal);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 40dcee3441a2..ca70cf967a46 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -66,11 +66,13 @@ protected:
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
+ const bool Minimal;
public:
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
AliasAnalysis *AA,
- const MachineBranchProbabilityInfo *MBPI);
+ const MachineBranchProbabilityInfo *MBPI,
+ bool Minimal);
// initPacketizerState - initialize some internal flags.
void initPacketizerState() override;
@@ -147,7 +149,7 @@ protected:
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
- bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
+ bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
};
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index cb504b5c3d5d..6543d8313900 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -26,11 +26,9 @@ namespace llvm {
/// instruction info tracks.
namespace HexagonII {
unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY;
- unsigned const TypeCVI_LAST = TypeCVI_VX_LATE;
+ unsigned const TypeCVI_LAST = TypeCVI_ZW;
enum SubTarget {
- HasV4SubT = 0x3f,
- HasV5SubT = 0x3e,
HasV55SubT = 0x3c,
HasV60SubT = 0x38,
};
@@ -57,117 +55,117 @@ namespace HexagonII {
// MCInstrDesc TSFlags
// *** Must match HexagonInstrFormat*.td ***
enum {
- // This 5-bit field describes the insn type.
- TypePos = 0,
- TypeMask = 0x3f,
+ // This 7-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x7f,
// Solo instructions.
- SoloPos = 6,
+ SoloPos = 7,
SoloMask = 0x1,
// Packed only with A or X-type instructions.
- SoloAXPos = 7,
+ SoloAXPos = 8,
SoloAXMask = 0x1,
// Only A-type instruction in first slot or nothing.
- RestrictSlot1AOKPos = 8,
+ RestrictSlot1AOKPos = 9,
RestrictSlot1AOKMask = 0x1,
// Predicated instructions.
- PredicatedPos = 9,
+ PredicatedPos = 10,
PredicatedMask = 0x1,
- PredicatedFalsePos = 10,
+ PredicatedFalsePos = 11,
PredicatedFalseMask = 0x1,
- PredicatedNewPos = 11,
+ PredicatedNewPos = 12,
PredicatedNewMask = 0x1,
- PredicateLatePos = 12,
+ PredicateLatePos = 13,
PredicateLateMask = 0x1,
// New-Value consumer instructions.
- NewValuePos = 13,
+ NewValuePos = 14,
NewValueMask = 0x1,
// New-Value producer instructions.
- hasNewValuePos = 14,
+ hasNewValuePos = 15,
hasNewValueMask = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos = 15,
+ NewValueOpPos = 16,
NewValueOpMask = 0x7,
// Stores that can become new-value stores.
- mayNVStorePos = 18,
+ mayNVStorePos = 19,
mayNVStoreMask = 0x1,
// New-value store instructions.
- NVStorePos = 19,
+ NVStorePos = 20,
NVStoreMask = 0x1,
// Loads that can become current-value loads.
- mayCVLoadPos = 20,
+ mayCVLoadPos = 21,
mayCVLoadMask = 0x1,
// Current-value load instructions.
- CVLoadPos = 21,
+ CVLoadPos = 22,
CVLoadMask = 0x1,
// Extendable insns.
- ExtendablePos = 22,
+ ExtendablePos = 23,
ExtendableMask = 0x1,
// Insns must be extended.
- ExtendedPos = 23,
+ ExtendedPos = 24,
ExtendedMask = 0x1,
// Which operand may be extended.
- ExtendableOpPos = 24,
+ ExtendableOpPos = 25,
ExtendableOpMask = 0x7,
// Signed or unsigned range.
- ExtentSignedPos = 27,
+ ExtentSignedPos = 28,
ExtentSignedMask = 0x1,
// Number of bits of range before extending operand.
- ExtentBitsPos = 28,
+ ExtentBitsPos = 29,
ExtentBitsMask = 0x1f,
// Alignment power-of-two before extending operand.
- ExtentAlignPos = 33,
+ ExtentAlignPos = 34,
ExtentAlignMask = 0x3,
- CofMax1Pos = 35,
+ CofMax1Pos = 36,
CofMax1Mask = 0x1,
- CofRelax1Pos = 36,
+ CofRelax1Pos = 37,
CofRelax1Mask = 0x1,
- CofRelax2Pos = 37,
+ CofRelax2Pos = 38,
CofRelax2Mask = 0x1,
- RestrictNoSlot1StorePos = 38,
+ RestrictNoSlot1StorePos = 39,
RestrictNoSlot1StoreMask = 0x1,
// Addressing mode for load/store instructions.
- AddrModePos = 41,
+ AddrModePos = 42,
AddrModeMask = 0x7,
// Access size for load/store instructions.
- MemAccessSizePos = 44,
+ MemAccessSizePos = 45,
MemAccesSizeMask = 0xf,
// Branch predicted taken.
- TakenPos = 48,
+ TakenPos = 49,
TakenMask = 0x1,
// Floating-point instructions.
- FPPos = 49,
+ FPPos = 50,
FPMask = 0x1,
// New-Value producer-2 instructions.
- hasNewValuePos2 = 51,
+ hasNewValuePos2 = 52,
hasNewValueMask2 = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos2 = 52,
+ NewValueOpPos2 = 53,
NewValueOpMask2 = 0x7,
// Accumulator instructions.
- AccumulatorPos = 55,
+ AccumulatorPos = 56,
AccumulatorMask = 0x1,
// Complex XU, prevent xu competition by preferring slot3
- PrefersSlot3Pos = 56,
+ PrefersSlot3Pos = 57,
PrefersSlot3Mask = 0x1,
// v65
- HasTmpDstPos = 59,
+ HasTmpDstPos = 60,
HasTmpDstMask = 0x1,
- CVINewPos = 61,
- CVINewMask = 0x1
+ CVINewPos = 62,
+ CVINewMask = 0x1,
};
// *** The code above must match HexagonInstrFormat*.td *** //
@@ -176,7 +174,7 @@ namespace HexagonII {
enum HexagonMOTargetFlagVal {
// Hexagon-specific MachineOperand target flags.
//
- // When chaning these, make sure to update
+ // When changing these, make sure to update
// getSerializableDirectMachineOperandTargetFlags and
// getSerializableBitmaskMachineOperandTargetFlags if needed.
MO_NO_FLAG,
@@ -189,7 +187,8 @@ namespace HexagonII {
MO_GOT,
// Low or high part of a symbol.
- MO_LO16, MO_HI16,
+ MO_LO16,
+ MO_HI16,
// Offset from the base of the SDA.
MO_GPREL,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 3b3a15b990f1..687e79a7dbab 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -28,26 +28,8 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
-HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
- MCInstrInfo const &MII,
- MCRegisterInfo const &MRI)
- : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
-}
-
-StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
- return MII.getName(Opcode);
-}
-
void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
- O << getRegName(RegNo);
-}
-
-StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
- return getRegisterName(RegNo);
-}
-
-void HexagonInstPrinter::setExtender(MCInst const &MCI) {
- HasExtender = HexagonMCInstrInfo::isImmext(MCI);
+ O << getRegisterName(RegNo);
}
void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
@@ -65,7 +47,7 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
printInstruction(MCI.getOperand(0).getInst(), OS);
} else
printInstruction(&MCI, OS);
- setExtender(MCI);
+ HasExtender = HexagonMCInstrInfo::isImmext(MCI);
OS << "\n";
}
@@ -97,72 +79,6 @@ void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
}
}
-void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
- unsigned OpNo,
- raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- O << -MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- O << -1;
-}
-
-void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
- printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- // Branches can take an immediate operand. This is used by the branch
- // selection pass to print $+8, an eight byte displacement from the PC.
- llvm_unreachable("Unknown branch operand.");
-}
-
-void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {}
-
-void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
- raw_ostream &O, bool hi) const {
- assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
-
- O << '#' << (hi ? "HI" : "LO") << '(';
- O << '#';
- printOperand(MI, OpNo, O);
- O << ')';
-}
-
void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
MCOperand const &MO = MI->getOperand(OpNo);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index ac8e391905e0..17af046ce090 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -26,57 +26,25 @@ namespace llvm {
class HexagonInstPrinter : public MCInstPrinter {
public:
explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
- MCRegisterInfo const &MRI);
+ MCRegisterInfo const &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
+
void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
- virtual StringRef getOpcodeName(unsigned Opcode) const;
- void printInstruction(MCInst const *MI, raw_ostream &O);
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
- StringRef getRegName(unsigned RegNo) const;
static char const *getRegisterName(unsigned RegNo);
- void printRegName(raw_ostream &O, unsigned RegNo) const override;
+ void printInstruction(MCInst const *MI, raw_ostream &O);
void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNegImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printBranchOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printPredicateOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printGlobalOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-
- void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
- printSymbol(MI, OpNo, O, true);
- }
- void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
- printSymbol(MI, OpNo, O, false);
- }
-
MCAsmInfo const &getMAI() const { return MAI; }
MCInstrInfo const &getMII() const { return MII; }
-protected:
- void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
- bool hi) const;
-
private:
MCInstrInfo const &MII;
-
- bool HasExtender;
- void setExtender(MCInst const &MCI);
+ bool HasExtender = false;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index b208a3668124..f0654d612b4b 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -127,6 +127,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x4;
}
+ break;
case HexagonII::HSIG_L2:
switch (Gb) {
default:
@@ -138,6 +139,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x5;
}
+ break;
case HexagonII::HSIG_S1:
switch (Gb) {
default:
@@ -151,6 +153,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x6;
}
+ break;
case HexagonII::HSIG_S2:
switch (Gb) {
default:
@@ -166,6 +169,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x7;
}
+ break;
case HexagonII::HSIG_A:
switch (Gb) {
default:
@@ -173,11 +177,13 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
case HexagonII::HSIG_A:
return 0x3;
}
+ break;
case HexagonII::HSIG_Compound:
switch (Gb) {
case HexagonII::HSIG_Compound:
return 0xFFFFFFFF;
}
+ break;
}
return 0xFFFFFFFF;
}
@@ -634,8 +640,7 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
return false;
}
- if (STI.getCPU().equals_lower("hexagonv4") ||
- STI.getCPU().equals_lower("hexagonv5") ||
+ if (STI.getCPU().equals_lower("hexagonv5") ||
STI.getCPU().equals_lower("hexagonv55") ||
STI.getCPU().equals_lower("hexagonv60")) {
// If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index 9fbe299d7d52..f0689252b396 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -9,11 +9,11 @@
//===----------------------------------------------------------------------===//
#include "HexagonMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index b211a81524fb..92ce7345f358 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -61,8 +61,6 @@ cl::opt<bool> llvm::HexagonDisableDuplex
cl::desc("Disable looking for duplex instructions for Hexagon"));
namespace { // These flags are to be deprecated
-cl::opt<bool> MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"),
- cl::init(false));
cl::opt<bool> MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"),
cl::init(false));
cl::opt<bool> MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"),
@@ -73,6 +71,8 @@ cl::opt<bool> MV62("mv62", cl::Hidden, cl::desc("Build for Hexagon V62"),
cl::init(false));
cl::opt<bool> MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"),
cl::init(false));
+cl::opt<bool> MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"),
+ cl::init(false));
} // namespace
cl::opt<Hexagon::ArchEnum>
@@ -82,19 +82,20 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"),
clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
- // Sentinal for no value specified
- clEnumValN(Hexagon::ArchEnum::V5, "", "")),
- // Sentinal for flag not present
- cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional);
+ clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
+ // Sentinel for no value specified.
+ clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
+ // Sentinel for flag not present.
+ cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+
static cl::opt<bool>
- DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions"));
+ DisableHVX("mno-hvx", cl::Hidden,
+ cl::desc("Disable Hexagon Vector eXtensions"));
static StringRef DefaultArch = "hexagonv60";
static StringRef HexagonGetArchVariant() {
- if (MV4)
- return "hexagonv4";
if (MV5)
return "hexagonv5";
if (MV55)
@@ -105,6 +106,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv62";
if (MV65)
return "hexagonv65";
+ if (MV66)
+ return "hexagonv66";
return "";
}
@@ -123,7 +126,7 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
return ArchV;
}
-unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }
namespace {
@@ -279,6 +282,7 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
Result.push_back(FS);
switch (EnableHVX) {
+ case Hexagon::ArchEnum::V5:
case Hexagon::ArchEnum::V55:
break;
case Hexagon::ArchEnum::V60:
@@ -290,14 +294,18 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V65:
Result.push_back("+hvxv65");
break;
- case Hexagon::ArchEnum::V5:{
+ case Hexagon::ArchEnum::V66:
+ Result.push_back("+hvxv66");
+ break;
+ case Hexagon::ArchEnum::Generic:{
Result.push_back(StringSwitch<StringRef>(CPU)
.Case("hexagonv60", "+hvxv60")
.Case("hexagonv62", "+hvxv62")
- .Case("hexagonv65", "+hvxv65"));
+ .Case("hexagonv65", "+hvxv65")
+ .Case("hexagonv66", "+hvxv66"));
break;
}
- case Hexagon::ArchEnum::V4:
+ case Hexagon::ArchEnum::NoArch:
// Sentinal if -mhvx isn't specified
break;
}
@@ -307,15 +315,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
static bool isCPUValid(std::string CPU)
{
- std::vector<std::string> table
- {
- "generic",
- "hexagonv4",
- "hexagonv5",
- "hexagonv55",
- "hexagonv60",
- "hexagonv62",
- "hexagonv65",
+ std::vector<std::string> table {
+ "generic", "hexagonv5", "hexagonv55", "hexagonv60",
+ "hexagonv62", "hexagonv65", "hexagonv66",
};
return std::find(table.begin(), table.end(), CPU) != table.end();
@@ -336,8 +338,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// Make sure that +hvx-length turns hvx on, and that "hvx" alone
// turns on hvxvNN, corresponding to the existing ArchVNN.
FeatureBitset FB = S;
- unsigned CpuArch = ArchV4;
- for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) {
+ unsigned CpuArch = ArchV5;
+ for (unsigned F : {ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -351,7 +353,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
break;
}
bool HasHvxVer = false;
- for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65}) {
+ for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+ ExtensionHVXV66}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -364,6 +367,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV66:
+ FB.set(ExtensionHVXV66);
+ LLVM_FALLTHROUGH;
case ArchV65:
FB.set(ExtensionHVXV65);
LLVM_FALLTHROUGH;
@@ -402,12 +408,12 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
static std::map<StringRef,unsigned> ElfFlags = {
- {"hexagonv4", ELF::EF_HEXAGON_MACH_V4},
{"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
{"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
{"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
{"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
{"hexagonv65", ELF::EF_HEXAGON_MACH_V65},
+ {"hexagonv66", ELF::EF_HEXAGON_MACH_V66},
};
auto F = ElfFlags.find(STI.getCPU());
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 6cd1b3a4691f..d6ea664222d3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -18,6 +18,33 @@
#include <cstdint>
#include <string>
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
+// Minimum number of instructions in an end-loop packet.
+#define HEXAGON_PACKET_INNER_SIZE 2
+#define HEXAGON_PACKET_OUTER_SIZE 3
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
+#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
+
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
namespace llvm {
struct InstrItinerary;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 59f3caa6af94..f4ee2bbfaaaa 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -138,6 +138,8 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
(*TUL)[HexagonII::TypeCVI_SCATTER_NEW_ST] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_4SLOT_MPY] = UnitsAndLanes(CVI_XLANE, 4);
+ (*TUL)[HexagonII::TypeCVI_ZW] = UnitsAndLanes(CVI_ZW, 1);
}
HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
@@ -300,6 +302,7 @@ bool HexagonShuffler::check() {
// Number of memory operations, loads, solo loads, stores, solo stores, single
// stores.
unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ unsigned NonZCVIloads = 0, AllCVIloads = 0, CVIstores = 0;
// Number of duplex insns
unsigned duplex = 0;
unsigned pSlot3Cnt = 0;
@@ -331,6 +334,11 @@ bool HexagonShuffler::check() {
case HexagonII::TypeCVI_VM_TMP_LD:
case HexagonII::TypeCVI_GATHER:
case HexagonII::TypeCVI_GATHER_RST:
+ ++NonZCVIloads;
+ LLVM_FALLTHROUGH;
+ case HexagonII::TypeCVI_ZW:
+ ++AllCVIloads;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeLD:
++loads;
++memory;
@@ -348,6 +356,8 @@ bool HexagonShuffler::check() {
case HexagonII::TypeCVI_SCATTER_RST:
case HexagonII::TypeCVI_SCATTER_NEW_RST:
case HexagonII::TypeCVI_SCATTER_NEW_ST:
+ ++CVIstores;
+ LLVM_FALLTHROUGH;
case HexagonII::TypeST:
++stores;
++memory;
@@ -405,7 +415,11 @@ bool HexagonShuffler::check() {
applySlotRestrictions();
// Check if the packet is legal.
- if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory))) {
+ const unsigned ZCVIloads = AllCVIloads - NonZCVIloads;
+ const bool ValidHVXMem =
+ NonZCVIloads <= 1 && ZCVIloads <= 1 && CVIstores <= 1;
+ if ((load0 > 1 || store0 > 1 || !ValidHVXMem) ||
+ (duplex > 1 || (duplex && memory))) {
reportError(llvm::Twine("invalid instruction packet"));
return false;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 37f90bc46ac7..ef50c5bebbfb 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -75,7 +75,8 @@ private:
CVI_XLANE = 1 << 0,
CVI_SHIFT = 1 << 1,
CVI_MPY0 = 1 << 2,
- CVI_MPY1 = 1 << 3
+ CVI_MPY1 = 1 << 3,
+ CVI_ZW = 1 << 4
};
// Count of adjacent slots that the insn requires to be executed.
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index da339bfd3ff4..8dcd485d65e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -214,7 +214,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
return false;
return A.Id < B.Id;
};
- llvm::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+ llvm::sort(DRNs, UsesFirst);
if (trace())
dbgs() << "Removing dead ref nodes:\n";
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
index 3d1ec31dada7..d8ca08e70505 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -1471,7 +1471,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
// and add a def for each S in the closure.
// Sort the refs so that the phis will be created in a deterministic order.
- llvm::sort(MaxRefs.begin(), MaxRefs.end());
+ llvm::sort(MaxRefs);
// Remove duplicates.
auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
MaxRefs.erase(NewEnd, MaxRefs.end());
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
index c257d754ddf9..9ff48d25a026 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -207,7 +207,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
};
std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- llvm::sort(Tmp.begin(), Tmp.end(), Less);
+ llvm::sort(Tmp, Less);
// The vector is a list of instructions, so that defs coming from
// the same instruction don't need to be artificially ordered.
@@ -813,7 +813,7 @@ void Liveness::computeLiveIns() {
std::vector<RegisterRef> LV;
for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
- llvm::sort(LV.begin(), LV.end());
+ llvm::sort(LV);
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
@@ -824,7 +824,7 @@ void Liveness::computeLiveIns() {
const RegisterAggr &LG = LiveMap[&B];
for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
LV.push_back(*I);
- llvm::sort(LV.begin(), LV.end());
+ llvm::sort(LV);
dbgs() << "\tcomp = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 045a897c4126..0411704be6fb 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1498,8 +1498,8 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode(
break;
case LanaiISD::SELECT_CC:
KnownBits Known2;
- DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 493d02bef37c..196768fdc56a 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -101,12 +101,12 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned BaseRegA = 0, BaseRegB = 0;
+ MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
- if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
- getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
- if (BaseRegA == BaseRegB) {
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
int LowOffset = std::min(OffsetA, OffsetB);
int HighOffset = std::max(OffsetA, OffsetB);
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -733,8 +733,13 @@ unsigned LanaiInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasLoadFromStackSlot(MI, Accesses)){
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -750,9 +755,9 @@ unsigned LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo * /*TRI*/) const {
+bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
+ MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
// Handle only loads/stores with base register followed by immediate offset
// and with add as ALU op.
if (LdSt.getNumOperands() != 4)
@@ -782,14 +787,17 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
break;
}
- BaseReg = LdSt.getOperand(1).getReg();
+ BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
-bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
- MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool LanaiInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+ MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
default:
return false;
@@ -803,6 +811,6 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
case Lanai::LDBs_RI:
case Lanai::LDBz_RI:
unsigned Width;
- return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+ return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
}
}
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index fe22fde2470b..bdcf9a361b5f 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -68,13 +68,13 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
- bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset, unsigned &Width,
- const TargetRegisterInfo *TRI) const;
+ bool getMemOperandWithOffsetWidth(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 35e2542dfb13..54500b0e52e3 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -277,8 +277,7 @@ void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
// Transfer memory operands.
- InstrBuilder->setMemRefs(MemInstr->memoperands_begin(),
- MemInstr->memoperands_end());
+ InstrBuilder.setMemRefs(MemInstr->memoperands());
}
// Function determines if ALU operation (in alu_iter) can be combined with
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 2c21a53b13bb..10bd9e2c65d2 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -53,12 +53,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Medium;
-}
-
LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
StringRef Cpu, StringRef FeatureString,
const TargetOptions &Options,
@@ -67,7 +61,8 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(), TT, Cpu, FeatureString, Options,
getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CodeModel), OptLevel),
+ getEffectiveCodeModel(CodeModel, CodeModel::Medium),
+ OptLevel),
Subtarget(TT, Cpu, FeatureString, *this, Options, getCodeModel(),
OptLevel),
TLOF(new LanaiTargetObjectFile()) {
diff --git a/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
new file mode 100644
index 000000000000..1ad70ac72c73
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -0,0 +1,580 @@
+//===- MSP430AsmParser.cpp - Parse MSP430 assembly to MCInst instructions -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430RegisterInfo.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define DEBUG_TYPE "msp430-asm-parser"
+
+using namespace llvm;
+
+namespace {
+
+/// Parses MSP430 assembly from a stream.
+class MSP430AsmParser : public MCTargetAsmParser {
+ const MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ const MCRegisterInfo *MRI;
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool ParseDirective(AsmToken DirectiveID) override;
+ bool ParseDirectiveRefSym(AsmToken DirectiveID);
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ bool parseJccInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands);
+
+ bool ParseOperand(OperandVector &Operands);
+
+ bool ParseLiteralValues(unsigned Size, SMLoc L);
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ /// @name Auto-generated Matcher Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MSP430GenAsmMatcher.inc"
+
+ /// }
+
+public:
+ MSP430AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
+ MRI = getContext().getRegisterInfo();
+
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+};
+
+/// A parsed MSP430 assembly operand.
+class MSP430Operand : public MCParsedAsmOperand {
+ typedef MCParsedAsmOperand Base;
+
+ enum KindTy {
+ k_Imm,
+ k_Reg,
+ k_Tok,
+ k_Mem,
+ k_IndReg,
+ k_PostIndReg
+ } Kind;
+
+ struct Memory {
+ unsigned Reg;
+ const MCExpr *Offset;
+ };
+ union {
+ const MCExpr *Imm;
+ unsigned Reg;
+ StringRef Tok;
+ Memory Mem;
+ };
+
+ SMLoc Start, End;
+
+public:
+ MSP430Operand(StringRef Tok, SMLoc const &S)
+ : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {}
+ MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {}
+ MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {}
+ MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E)
+ : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {}
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) &&
+ "Unexpected operand kind");
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ }
+
+ void addExprOperand(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediate when possible
+ if (!Expr)
+ Inst.addOperand(MCOperand::createImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(Kind == k_Imm && "Unexpected operand kind");
+ assert(N == 1 && "Invalid number of operands!");
+
+ addExprOperand(Inst, Imm);
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(Kind == k_Mem && "Unexpected operand kind");
+ assert(N == 2 && "Invalid number of operands");
+
+ Inst.addOperand(MCOperand::createReg(Mem.Reg));
+ addExprOperand(Inst, Mem.Offset);
+ }
+
+ bool isReg() const { return Kind == k_Reg; }
+ bool isImm() const { return Kind == k_Imm; }
+ bool isToken() const { return Kind == k_Tok; }
+ bool isMem() const { return Kind == k_Mem; }
+ bool isIndReg() const { return Kind == k_IndReg; }
+ bool isPostIndReg() const { return Kind == k_PostIndReg; }
+
+ bool isCGImm() const {
+ if (Kind != k_Imm)
+ return false;
+
+ int64_t Val;
+ if (!Imm->evaluateAsAbsolute(Val))
+ return false;
+
+ if (Val == 0 || Val == 1 || Val == 2 || Val == 4 || Val == 8 || Val == -1)
+ return true;
+
+ return false;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Tok && "Invalid access!");
+ return Tok;
+ }
+
+ unsigned getReg() const {
+ assert(Kind == k_Reg && "Invalid access!");
+ return Reg;
+ }
+
+ void setReg(unsigned RegNo) {
+ assert(Kind == k_Reg && "Invalid access!");
+ Reg = RegNo;
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateToken(StringRef Str, SMLoc S) {
+ return make_unique<MSP430Operand>(Str, S);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_Reg, RegNum, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(Val, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateMem(unsigned RegNum,
+ const MCExpr *Val,
+ SMLoc S, SMLoc E) {
+ return make_unique<MSP430Operand>(RegNum, Val, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreateIndReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_IndReg, RegNum, S, E);
+ }
+
+ static std::unique_ptr<MSP430Operand> CreatePostIndReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ return make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
+ }
+
+ SMLoc getStartLoc() const { return Start; }
+ SMLoc getEndLoc() const { return End; }
+
+ virtual void print(raw_ostream &O) const {
+ switch (Kind) {
+ case k_Tok:
+ O << "Token " << Tok;
+ break;
+ case k_Reg:
+ O << "Register " << Reg;
+ break;
+ case k_Imm:
+ O << "Immediate " << *Imm;
+ break;
+ case k_Mem:
+ O << "Memory ";
+ O << *Mem.Offset << "(" << Reg << ")";
+ break;
+ case k_IndReg:
+ O << "RegInd " << Reg;
+ break;
+ case k_PostIndReg:
+ O << "PostInc " << Reg;
+ break;
+ }
+ }
+};
+} // end anonymous namespace
+
+bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+ switch (MatchResult) {
+ case Match_Success:
+ Inst.setLoc(Loc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MnemonicFail:
+ return Error(Loc, "invalid instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = Loc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MSP430Operand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = Loc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ default:
+ return true;
+ }
+}
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+static unsigned MatchRegisterAltName(StringRef Name);
+
+bool MSP430AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ auto Name = getLexer().getTok().getIdentifier().lower();
+ RegNo = MatchRegisterName(Name);
+ if (RegNo == MSP430::NoRegister) {
+ RegNo = MatchRegisterAltName(Name);
+ if (RegNo == MSP430::NoRegister)
+ return true;
+ }
+
+ AsmToken const &T = getParser().getTok();
+ StartLoc = T.getLoc();
+ EndLoc = T.getEndLoc();
+ getLexer().Lex(); // eat register token
+
+ return false;
+ }
+
+ return Error(StartLoc, "invalid register name");
+}
+
+bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ if (!Name.startswith_lower("j"))
+ return true;
+
+ auto CC = Name.drop_front().lower();
+ unsigned CondCode;
+ if (CC == "ne" || CC == "nz")
+ CondCode = MSP430CC::COND_NE;
+ else if (CC == "eq" || CC == "z")
+ CondCode = MSP430CC::COND_E;
+ else if (CC == "lo" || CC == "nc")
+ CondCode = MSP430CC::COND_LO;
+ else if (CC == "hs" || CC == "c")
+ CondCode = MSP430CC::COND_HS;
+ else if (CC == "n")
+ CondCode = MSP430CC::COND_N;
+ else if (CC == "ge")
+ CondCode = MSP430CC::COND_GE;
+ else if (CC == "l")
+ CondCode = MSP430CC::COND_L;
+ else if (CC == "mp")
+ CondCode = MSP430CC::COND_NONE;
+ else
+ return Error(NameLoc, "unknown instruction");
+
+ if (CondCode == (unsigned)MSP430CC::COND_NONE)
+ Operands.push_back(MSP430Operand::CreateToken("jmp", NameLoc));
+ else {
+ Operands.push_back(MSP430Operand::CreateToken("j", NameLoc));
+ const MCExpr *CCode = MCConstantExpr::create(CondCode, getContext());
+ Operands.push_back(MSP430Operand::CreateImm(CCode, SMLoc(), SMLoc()));
+ }
+
+ // Skip optional '$' sign.
+ if (getLexer().getKind() == AsmToken::Dollar)
+ getLexer().Lex(); // Eat '$'
+
+ const MCExpr *Val;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (getParser().parseExpression(Val))
+ return Error(ExprLoc, "expected expression operand");
+
+ int64_t Res;
+ if (Val->evaluateAsAbsolute(Res))
+ if (Res < -512 || Res > 511)
+ return Error(ExprLoc, "invalid jump offset");
+
+ Operands.push_back(MSP430Operand::CreateImm(Val, ExprLoc,
+ getLexer().getLoc()));
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ getParser().eatToEndOfStatement();
+ return Error(Loc, "unexpected token");
+ }
+
+ getParser().Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ // Drop .w suffix
+ if (Name.endswith_lower(".w"))
+ Name = Name.drop_back(2);
+
+ if (!parseJccInstruction(Info, Name, NameLoc, Operands))
+ return false;
+
+ // First operand is instruction mnemonic
+ Operands.push_back(MSP430Operand::CreateToken(Name, NameLoc));
+
+ // If there are no more operands, then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse first operand
+ if (ParseOperand(Operands))
+ return true;
+
+ // Parse second operand if any
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // Eat ','
+ if (ParseOperand(Operands))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ getParser().eatToEndOfStatement();
+ return Error(Loc, "unexpected token");
+ }
+
+ getParser().Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ return false;
+}
+
+bool MSP430AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal.lower() == ".long") {
+ ParseLiteralValues(4, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
+ ParseLiteralValues(2, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".byte") {
+ ParseLiteralValues(1, DirectiveID.getLoc());
+ } else if (IDVal.lower() == ".refsym") {
+ return ParseDirectiveRefSym(DirectiveID);
+ }
+ return true;
+}
+
+bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
+ switch (getLexer().getKind()) {
+ default: return true;
+ case AsmToken::Identifier: {
+ // try rN
+ unsigned RegNo;
+ SMLoc StartLoc, EndLoc;
+ if (!ParseRegister(RegNo, StartLoc, EndLoc)) {
+ Operands.push_back(MSP430Operand::CreateReg(RegNo, StartLoc, EndLoc));
+ return false;
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case AsmToken::Integer:
+ case AsmToken::Plus:
+ case AsmToken::Minus: {
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ const MCExpr *Val;
+ // Try constexpr[(rN)]
+ if (!getParser().parseExpression(Val)) {
+ unsigned RegNo = MSP430::PC;
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ // Try (rN)
+ if (getLexer().getKind() == AsmToken::LParen) {
+ getLexer().Lex(); // Eat '('
+ SMLoc RegStartLoc;
+ if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+ return true;
+ if (getLexer().getKind() != AsmToken::RParen)
+ return true;
+ EndLoc = getParser().getTok().getEndLoc();
+ getLexer().Lex(); // Eat ')'
+ }
+ Operands.push_back(MSP430Operand::CreateMem(RegNo, Val, StartLoc,
+ EndLoc));
+ return false;
+ }
+ return true;
+ }
+ case AsmToken::Amp: {
+ // Try &constexpr
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '&'
+ const MCExpr *Val;
+ if (!getParser().parseExpression(Val)) {
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ Operands.push_back(MSP430Operand::CreateMem(MSP430::SR, Val, StartLoc,
+ EndLoc));
+ return false;
+ }
+ return true;
+ }
+ case AsmToken::At: {
+ // Try @rN[+]
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '@'
+ unsigned RegNo;
+ SMLoc RegStartLoc, EndLoc;
+ if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+ return true;
+ if (getLexer().getKind() == AsmToken::Plus) {
+ Operands.push_back(MSP430Operand::CreatePostIndReg(RegNo, StartLoc, EndLoc));
+ getLexer().Lex(); // Eat '+'
+ return false;
+ }
+ if (Operands.size() > 1) // Emulate @rd in destination position as 0(rd)
+ Operands.push_back(MSP430Operand::CreateMem(RegNo,
+ MCConstantExpr::create(0, getContext()), StartLoc, EndLoc));
+ else
+ Operands.push_back(MSP430Operand::CreateIndReg(RegNo, StartLoc, EndLoc));
+ return false;
+ }
+ case AsmToken::Hash:
+ // Try #constexpr
+ SMLoc StartLoc = getParser().getTok().getLoc();
+ getLexer().Lex(); // Eat '#'
+ const MCExpr *Val;
+ if (!getParser().parseExpression(Val)) {
+ SMLoc EndLoc = getParser().getTok().getLoc();
+ Operands.push_back(MSP430Operand::CreateImm(Val, StartLoc, EndLoc));
+ return false;
+ }
+ return true;
+ }
+}
+
+bool MSP430AsmParser::ParseLiteralValues(unsigned Size, SMLoc L) {
+ auto parseOne = [&]() -> bool {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().EmitValue(Value, Size, L);
+ return false;
+ };
+ return (parseMany(parseOne));
+}
+
+extern "C" void LLVMInitializeMSP430AsmParser() {
+ RegisterMCAsmParser<MSP430AsmParser> X(getTheMSP430Target());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MSP430GenAsmMatcher.inc"
+
+static unsigned convertGR16ToGR8(unsigned Reg) {
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown GR16 register");
+ case MSP430::PC: return MSP430::PCB;
+ case MSP430::SP: return MSP430::SPB;
+ case MSP430::SR: return MSP430::SRB;
+ case MSP430::CG: return MSP430::CGB;
+ case MSP430::FP: return MSP430::FPB;
+ case MSP430::R5: return MSP430::R5B;
+ case MSP430::R6: return MSP430::R6B;
+ case MSP430::R7: return MSP430::R7B;
+ case MSP430::R8: return MSP430::R8B;
+ case MSP430::R9: return MSP430::R9B;
+ case MSP430::R10: return MSP430::R10B;
+ case MSP430::R11: return MSP430::R11B;
+ case MSP430::R12: return MSP430::R12B;
+ case MSP430::R13: return MSP430::R13B;
+ case MSP430::R14: return MSP430::R14B;
+ case MSP430::R15: return MSP430::R15B;
+ }
+}
+
+unsigned MSP430AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ MSP430Operand &Op = static_cast<MSP430Operand &>(AsmOp);
+
+ if (!Op.isReg())
+ return Match_InvalidOperand;
+
+ unsigned Reg = Op.getReg();
+ bool isGR16 =
+ MSP430MCRegisterClasses[MSP430::GR16RegClassID].contains(Reg);
+
+ if (isGR16 && (Kind == MCK_GR8)) {
+ Op.setReg(convertGR16ToGR8(Reg));
+ return Match_Success;
+ }
+
+ return Match_InvalidOperand;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
new file mode 100644
index 000000000000..e5da130f9bbb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -0,0 +1,387 @@
+//===-- MSP430Disassembler.cpp - Disassembler for MSP430 ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430Disassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "msp430-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class MSP430Disassembler : public MCDisassembler {
+ DecodeStatus getInstructionI(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+ DecodeStatus getInstructionII(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+ DecodeStatus getInstructionCJ(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
+public:
+ MSP430Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createMSP430Disassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MSP430Disassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeMSP430Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(getTheMSP430Target(),
+ createMSP430Disassembler);
+}
+
+static const unsigned GR8DecoderTable[] = {
+ MSP430::PCB, MSP430::SPB, MSP430::SRB, MSP430::CGB,
+ MSP430::FPB, MSP430::R5B, MSP430::R6B, MSP430::R7B,
+ MSP430::R8B, MSP430::R9B, MSP430::R10B, MSP430::R11B,
+ MSP430::R12B, MSP430::R13B, MSP430::R14B, MSP430::R15B
+};
+
+static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = GR8DecoderTable[RegNo];
+ MI.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static const unsigned GR16DecoderTable[] = {
+ MSP430::PC, MSP430::SP, MSP430::SR, MSP430::CG,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
+};
+
+static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = GR16DecoderTable[RegNo];
+ MI.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "MSP430GenDisassemblerTables.inc"
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+ const void *Decoder) {
+ int64_t Imm;
+ switch (Bits) {
+ default:
+ llvm_unreachable("Invalid immediate value");
+ case 0x22: Imm = 4; break;
+ case 0x32: Imm = 8; break;
+ case 0x03: Imm = 0; break;
+ case 0x13: Imm = 1; break;
+ case 0x23: Imm = 2; break;
+ case 0x33: Imm = -1; break;
+ }
+ MI.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Reg = Bits & 15;
+ unsigned Imm = Bits >> 4;
+
+ if (DecodeGR16RegisterClass(MI, Reg, Address, Decoder) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+
+ MI.addOperand(MCOperand::createImm((int16_t)Imm));
+ return MCDisassembler::Success;
+}
+
+enum AddrMode {
+ amInvalid = 0,
+ amRegister,
+ amIndexed,
+ amIndirect,
+ amIndirectPost,
+ amSymbolic,
+ amImmediate,
+ amAbsolute,
+ amConstant
+};
+
+static AddrMode DecodeSrcAddrMode(unsigned Rs, unsigned As) {
+ switch (Rs) {
+ case 0:
+ if (As == 1) return amSymbolic;
+ if (As == 2) return amInvalid;
+ if (As == 3) return amImmediate;
+ break;
+ case 2:
+ if (As == 1) return amAbsolute;
+ if (As == 2) return amConstant;
+ if (As == 3) return amConstant;
+ break;
+ case 3:
+ return amConstant;
+ default:
+ break;
+ }
+ switch (As) {
+ case 0: return amRegister;
+ case 1: return amIndexed;
+ case 2: return amIndirect;
+ case 3: return amIndirectPost;
+ default:
+ llvm_unreachable("As out of range");
+ }
+}
+
+static AddrMode DecodeSrcAddrModeI(unsigned Insn) {
+ unsigned Rs = fieldFromInstruction(Insn, 8, 4);
+ unsigned As = fieldFromInstruction(Insn, 4, 2);
+ return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeSrcAddrModeII(unsigned Insn) {
+ unsigned Rs = fieldFromInstruction(Insn, 0, 4);
+ unsigned As = fieldFromInstruction(Insn, 4, 2);
+ return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeDstAddrMode(unsigned Insn) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 4);
+ unsigned Ad = fieldFromInstruction(Insn, 7, 1);
+ switch (Rd) {
+ case 0: return Ad ? amSymbolic : amRegister;
+ case 2: return Ad ? amAbsolute : amRegister;
+ default:
+ break;
+ }
+ return Ad ? amIndexed : amRegister;
+}
+
+static const uint8_t *getDecoderTable(AddrMode SrcAM, unsigned Words) {
+ assert(0 < Words && Words < 4 && "Incorrect number of words");
+ switch (SrcAM) {
+ default:
+ llvm_unreachable("Invalid addressing mode");
+ case amRegister:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableAlpha32 : DecoderTableAlpha16;
+ case amConstant:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableBeta32 : DecoderTableBeta16;
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ assert(Words > 1 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableGamma32 : DecoderTableGamma48;
+ case amIndirect:
+ case amIndirectPost:
+ assert(Words < 3 && "Incorrect number of words");
+ return Words == 2 ? DecoderTableDelta32 : DecoderTableDelta16;
+ }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionI(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ AddrMode SrcAM = DecodeSrcAddrModeI(Insn);
+ AddrMode DstAM = DecodeDstAddrMode(Insn);
+ if (SrcAM == amInvalid || DstAM == amInvalid) {
+ Size = 2; // skip one word and let disassembler to try further
+ return MCDisassembler::Fail;
+ }
+
+ unsigned Words = 1;
+ switch (SrcAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+ ++Words;
+ break;
+ default:
+ break;
+ }
+ switch (DstAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + Words * 2)
+ << (Words * 16);
+ ++Words;
+ break;
+ default:
+ break;
+ }
+
+ DecodeStatus Result = decodeInstruction(getDecoderTable(SrcAM, Words), MI,
+ Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = Words * 2;
+ return Result;
+ }
+
+ Size = 2;
+ return DecodeStatus::Fail;
+}
+
+DecodeStatus MSP430Disassembler::getInstructionII(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ AddrMode SrcAM = DecodeSrcAddrModeII(Insn);
+ if (SrcAM == amInvalid) {
+ Size = 2; // skip one word and let disassembler to try further
+ return MCDisassembler::Fail;
+ }
+
+ unsigned Words = 1;
+ switch (SrcAM) {
+ case amIndexed:
+ case amSymbolic:
+ case amImmediate:
+ case amAbsolute:
+ if (Bytes.size() < (Words + 1) * 2) {
+ Size = 2;
+ return DecodeStatus::Fail;
+ }
+ Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+ ++Words;
+ break;
+ default:
+ break;
+ }
+
+ const uint8_t *DecoderTable = Words == 2 ? DecoderTable32 : DecoderTable16;
+ DecodeStatus Result = decodeInstruction(DecoderTable, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = Words * 2;
+ return Result;
+ }
+
+ Size = 2;
+ return DecodeStatus::Fail;
+}
+
+static MSP430CC::CondCodes getCondCode(unsigned Cond) {
+ switch (Cond) {
+ case 0: return MSP430CC::COND_NE;
+ case 1: return MSP430CC::COND_E;
+ case 2: return MSP430CC::COND_LO;
+ case 3: return MSP430CC::COND_HS;
+ case 4: return MSP430CC::COND_N;
+ case 5: return MSP430CC::COND_GE;
+ case 6: return MSP430CC::COND_L;
+ case 7: return MSP430CC::COND_NONE;
+ default:
+ llvm_unreachable("Cond out of range");
+ }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionCJ(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ unsigned Cond = fieldFromInstruction(Insn, 10, 3);
+ unsigned Offset = fieldFromInstruction(Insn, 0, 10);
+
+ MI.addOperand(MCOperand::createImm(SignExtend32(Offset, 10)));
+
+ if (Cond == 7)
+ MI.setOpcode(MSP430::JMP);
+ else {
+ MI.setOpcode(MSP430::JCC);
+ MI.addOperand(MCOperand::createImm(getCondCode(Cond)));
+ }
+
+ Size = 2;
+ return DecodeStatus::Success;
+}
+
+DecodeStatus MSP430Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ uint64_t Insn = support::endian::read16le(Bytes.data());
+ unsigned Opc = fieldFromInstruction(Insn, 13, 3);
+ switch (Opc) {
+ case 0:
+ return getInstructionII(MI, Size, Bytes, Address, VStream, CStream);
+ case 1:
+ return getInstructionCJ(MI, Size, Bytes, Address, VStream, CStream);
+ default:
+ return getInstructionI(MI, Size, Bytes, Address, VStream, CStream);
+ }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index be6d1a84a377..4d62547bc65b 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -16,28 +16,34 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-
// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
#include "MSP430GenAsmWriter.inc"
void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
- printInstruction(MI, O);
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
printAnnotation(O, Annot);
}
void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm())
- O << Op.getImm();
- else {
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm() * 2 + 2;
+ O << "$";
+ if (Imm >= 0)
+ O << '+';
+ O << Imm;
+ } else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
Op.getExpr()->print(O, &MAI);
}
@@ -72,7 +78,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
// vs
// mov.w glb(r1), r2
// Otherwise (!) msp430-as will silently miscompile the output :(
- if (!Base.getReg())
+ if (Base.getReg() == MSP430::SR)
O << '&';
if (Disp.isExpr())
@@ -83,10 +89,23 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
}
// Print register base field
- if (Base.getReg())
+ if ((Base.getReg() != MSP430::SR) &&
+ (Base.getReg() != MSP430::PC))
O << '(' << getRegisterName(Base.getReg()) << ')';
}
+void MSP430InstPrinter::printIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ O << "@" << getRegisterName(Base.getReg());
+}
+
+void MSP430InstPrinter::printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Base = MI->getOperand(OpNo);
+ O << "@" << getRegisterName(Base.getReg()) << "+";
+}
+
void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned CC = MI->getOperand(OpNo).getImm();
@@ -112,5 +131,8 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
case MSP430CC::COND_L:
O << 'l';
break;
+ case MSP430CC::COND_N:
+ O << 'n';
+ break;
}
}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 72afec18becb..cd02c4fa645a 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -28,13 +28,20 @@ namespace llvm {
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
+private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
+ void printIndRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
new file mode 100644
index 000000000000..bd69a9d8d795
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -0,0 +1,178 @@
+//===-- MSP430AsmBackend.cpp - MSP430 Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430AsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+
+ uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ MCContext &Ctx) const;
+
+public:
+ MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI)
+ : MCAsmBackend(support::little), OSABI(OSABI) {}
+ ~MSP430AsmBackend() override {}
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createMSP430ELFObjectWriter(OSABI);
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ return MSP430::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds] = {
+ // This table must be in the same order of enum in MSP430FixupKinds.h.
+ //
+ // name offset bits flags
+ {"fixup_32", 0, 32, 0},
+ {"fixup_10_pcrel", 0, 10, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_16", 0, 16, 0},
+ {"fixup_16_pcrel", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_16_byte", 0, 16, 0},
+ {"fixup_16_pcrel_byte", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_2x_pcrel", 0, 10, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_rl_pcrel", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_8", 0, 8, 0},
+ {"fixup_sym_diff", 0, 32, 0},
+ };
+ static_assert((array_lengthof(Infos)) == MSP430::NumTargetFixupKinds,
+ "Not all fixup kinds added to Infos array");
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
+ return false;
+ }
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {}
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+
+uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
+ uint64_t Value,
+ MCContext &Ctx) const {
+ unsigned Kind = Fixup.getKind();
+ switch (Kind) {
+ case MSP430::fixup_10_pcrel: {
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned");
+
+ // Offset is signed
+ int16_t Offset = Value;
+ // Jumps are in words
+ Offset >>= 1;
+ // PC points to the next instruction so decrement by one
+ --Offset;
+
+ if (Offset < -512 || Offset > 511)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+ // Mask 10 bits
+ Offset &= 0x3ff;
+
+ return Offset;
+ }
+ default:
+ return Value;
+ }
+}
+
+void MSP430AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const {
+ Value = adjustFixupValue(Fixup, Value, Asm.getContext());
+ MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
+
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+}
+
+bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+ if ((Count % 2) != 0)
+ return false;
+
+ // The canonical nop on MSP430 is mov #0, r3
+ uint64_t NopCount = Count / 2;
+ while (NopCount--)
+ OS.write("\x03\x43", 2);
+
+ return true;
+}
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createMSP430MCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ return new MSP430AsmBackend(STI, ELF::ELFOSABI_STANDALONE);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
new file mode 100644
index 000000000000..e47db2400a05
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -0,0 +1,59 @@
+//===-- MSP430ELFObjectWriter.cpp - MSP430 ELF Writer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MSP430ELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(false, OSABI, ELF::EM_MSP430,
+ /*HasRelocationAddend*/ true) {}
+
+ ~MSP430ELFObjectWriter() override {}
+
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override {
+ // Translate fixup kind to ELF relocation type.
+ switch ((unsigned)Fixup.getKind()) {
+ case FK_Data_1: return ELF::R_MSP430_8;
+ case FK_Data_2: return ELF::R_MSP430_16_BYTE;
+ case FK_Data_4: return ELF::R_MSP430_32;
+ case MSP430::fixup_32: return ELF::R_MSP430_32;
+ case MSP430::fixup_10_pcrel: return ELF::R_MSP430_10_PCREL;
+ case MSP430::fixup_16: return ELF::R_MSP430_16;
+ case MSP430::fixup_16_pcrel: return ELF::R_MSP430_16_PCREL;
+ case MSP430::fixup_16_byte: return ELF::R_MSP430_16_BYTE;
+ case MSP430::fixup_16_pcrel_byte: return ELF::R_MSP430_16_PCREL_BYTE;
+ case MSP430::fixup_2x_pcrel: return ELF::R_MSP430_2X_PCREL;
+ case MSP430::fixup_rl_pcrel: return ELF::R_MSP430_RL_PCREL;
+ case MSP430::fixup_8: return ELF::R_MSP430_8;
+ case MSP430::fixup_sym_diff: return ELF::R_MSP430_SYM_DIFF;
+ default:
+ llvm_unreachable("Invalid fixup kind");
+ }
+ }
+};
+} // end of anonymous namespace
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createMSP430ELFObjectWriter(uint8_t OSABI) {
+ return llvm::make_unique<MSP430ELFObjectWriter>(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
new file mode 100644
index 000000000000..9449cb278024
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -0,0 +1,81 @@
+//===-- MSP430ELFStreamer.cpp - MSP430 ELF Target Streamer Methods --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class MSP430TargetELFStreamer : public MCTargetStreamer {
+public:
+ MCELFStreamer &getStreamer();
+ MSP430TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+};
+
+// This part is for ELF object output.
+MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI)
+ : MCTargetStreamer(S) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+ MCA.setELFHeaderEFlags(EFlags);
+
+ // Emit build attributes section according to
+ // MSP430 EABI (slaa534.pdf, part 13).
+ MCSection *AttributeSection = getStreamer().getContext().getELFSection(
+ ".MSP430.attributes", ELF::SHT_MSP430_ATTRIBUTES, 0);
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version.
+ Streamer.EmitIntValue(0x41, 1);
+ // Subsection length.
+ Streamer.EmitIntValue(22, 4);
+ // Vendor name string, zero-terminated.
+ Streamer.EmitBytes("mspabi");
+ Streamer.EmitIntValue(0, 1);
+
+ // Attribute vector scope tag. 1 stands for the entire file.
+ Streamer.EmitIntValue(1, 1);
+ // Attribute vector length.
+ Streamer.EmitIntValue(11, 4);
+ // OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
+ Streamer.EmitIntValue(4, 1);
+ Streamer.EmitIntValue(1, 1);
+ // OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
+ Streamer.EmitIntValue(6, 1);
+ Streamer.EmitIntValue(1, 1);
+ // OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
+ Streamer.EmitIntValue(8, 1);
+ Streamer.EmitIntValue(1, 1);
+}
+
+MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new MSP430TargetELFStreamer(S, STI);
+ return nullptr;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
new file mode 100644
index 000000000000..1eb6a2759423
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
@@ -0,0 +1,53 @@
+//===-- MSP430FixupKinds.h - MSP430 Specific Fixup Entries ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef MSP430
+
+namespace llvm {
+namespace MSP430 {
+
+// This table must be in the same order of
+// MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds]
+// in MSP430AsmBackend.cpp.
+//
+enum Fixups {
+ // A 32 bit absolute fixup.
+ fixup_32 = FirstTargetFixupKind,
+ // A 10 bit PC relative fixup.
+ fixup_10_pcrel,
+ // A 16 bit absolute fixup.
+ fixup_16,
+ // A 16 bit PC relative fixup.
+ fixup_16_pcrel,
+ // A 16 bit absolute fixup for byte operations.
+ fixup_16_byte,
+ // A 16 bit PC relative fixup for command address.
+ fixup_16_pcrel_byte,
+ // A 10 bit PC relative fixup for complicated polymorphs.
+ fixup_2x_pcrel,
+ // A 16 bit relaxable fixup.
+ fixup_rl_pcrel,
+ // A 8 bit absolute fixup.
+ fixup_8,
+ // A 32 bit symbol difference fixup.
+ fixup_sym_diff,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace MSP430
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 82e6731ecd78..36e9a9c31075 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -20,6 +20,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
CodePointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
+ SeparatorString = "{";
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
new file mode 100644
index 000000000000..06f9f307cb1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -0,0 +1,211 @@
+//===-- MSP430MCCodeEmitter.cpp - Convert MSP430 code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "MCTargetDesc/MSP430FixupKinds.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace llvm {
+
+class MSP430MCCodeEmitter : public MCCodeEmitter {
+ MCContext &Ctx;
+ MCInstrInfo const &MCII;
+
+ // Offset keeps track of current word number being emitted
+ // inside a particular instruction.
+ mutable unsigned Offset;
+
+ /// TableGen'erated function for getting the binary encoding for an
+ /// instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Returns the binary encoding of operands.
+ ///
+ /// If an operand requires relocation, the relocation is recorded
+ /// and zero is returned.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getMemOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getCGImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getCCOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+public:
+ MSP430MCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
+ : Ctx(ctx), MCII(MCII) {}
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+};
+
+void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ // Get byte count of instruction.
+ unsigned Size = Desc.getSize();
+
+ // Initialize fixup offset
+ Offset = 2;
+
+ uint64_t BinaryOpCode = getBinaryCodeForInstr(MI, Fixups, STI);
+ size_t WordCount = Size / 2;
+
+ while (WordCount--) {
+ support::endian::write(OS, (uint16_t)BinaryOpCode, support::little);
+ BinaryOpCode >>= 16;
+ }
+}
+
+unsigned MSP430MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ if (MO.isImm()) {
+ Offset += 2;
+ return MO.getImm();
+ }
+
+ assert(MO.isExpr() && "Expected expr operand");
+ Fixups.push_back(MCFixup::create(Offset, MO.getExpr(),
+ static_cast<MCFixupKind>(MSP430::fixup_16_byte), MI.getLoc()));
+ Offset += 2;
+ return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getMemOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO1 = MI.getOperand(Op);
+ assert(MO1.isReg() && "Register operand expected");
+ unsigned Reg = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+ const MCOperand &MO2 = MI.getOperand(Op + 1);
+ if (MO2.isImm()) {
+ Offset += 2;
+ return ((unsigned)MO2.getImm() << 4) | Reg;
+ }
+
+ assert(MO2.isExpr() && "Expr operand expected");
+ MSP430::Fixups FixupKind;
+ switch (Reg) {
+ case 0:
+ FixupKind = MSP430::fixup_16_pcrel_byte;
+ break;
+ case 2:
+ FixupKind = MSP430::fixup_16_byte;
+ break;
+ default:
+ FixupKind = MSP430::fixup_16_byte;
+ break;
+ }
+ Fixups.push_back(MCFixup::create(Offset, MO2.getExpr(),
+ static_cast<MCFixupKind>(FixupKind), MI.getLoc()));
+ Offset += 2;
+ return Reg;
+}
+
+unsigned MSP430MCCodeEmitter::getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr() && "Expr operand expected");
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ static_cast<MCFixupKind>(MSP430::fixup_10_pcrel), MI.getLoc()));
+ return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getCGImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ assert(MO.isImm() && "Expr operand expected");
+
+ int64_t Imm = MO.getImm();
+ switch (Imm) {
+ default:
+ llvm_unreachable("Invalid immediate value");
+ case 4: return 0x22;
+ case 8: return 0x32;
+ case 0: return 0x03;
+ case 1: return 0x13;
+ case 2: return 0x23;
+ case -1: return 0x33;
+ }
+}
+
+unsigned MSP430MCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ assert(MO.isImm() && "Immediate operand expected");
+ switch (MO.getImm()) {
+ case MSP430CC::COND_NE: return 0;
+ case MSP430CC::COND_E: return 1;
+ case MSP430CC::COND_LO: return 2;
+ case MSP430CC::COND_HS: return 3;
+ case MSP430CC::COND_N: return 4;
+ case MSP430CC::COND_GE: return 5;
+ case MSP430CC::COND_L: return 6;
+ default:
+ llvm_unreachable("Unknown condition code");
+ }
+}
+
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new MSP430MCCodeEmitter(Ctx, MCII);
+}
+
+#include "MSP430GenMCCodeEmitter.inc"
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 8c715500f38b..b21145d3904a 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,22 +58,15 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
}
extern "C" void LLVMInitializeMSP430TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<MSP430MCAsmInfo> X(getTheMSP430Target());
+ Target &T = getTheMSP430Target();
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(getTheMSP430Target(),
- createMSP430MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(getTheMSP430Target(),
- createMSP430MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(getTheMSP430Target(),
- createMSP430MCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(getTheMSP430Target(),
- createMSP430MCInstPrinter);
+ RegisterMCAsmInfo<MSP430MCAsmInfo> X(T);
+ TargetRegistry::RegisterMCInstrInfo(T, createMSP430MCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(T, createMSP430MCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(T, createMSP430MCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(T, createMSP430MCInstPrinter);
+ TargetRegistry::RegisterMCCodeEmitter(T, createMSP430MCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(T, createMSP430MCAsmBackend);
+ TargetRegistry::RegisterObjectTargetStreamer(
+ T, createMSP430ObjectTargetStreamer);
}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index b901c5f09794..e484c79c9ee9 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -15,12 +15,39 @@
#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
+#include <memory>
namespace llvm {
class Target;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class MCRegisterInfo;
+class MCContext;
+class MCTargetOptions;
+class MCObjectTargetWriter;
+class MCStreamer;
+class MCTargetStreamer;
Target &getTheMSP430Target();
+/// Creates a machine code emitter for MSP430.
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCAsmBackend *createMSP430MCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
+std::unique_ptr<MCObjectTargetWriter>
+createMSP430ELFObjectWriter(uint8_t OSABI);
+
} // End llvm namespace
// Defines symbolic names for MSP430 registers.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
index 796f25233123..7a5314a10844 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -27,6 +27,8 @@ namespace MSP430CC {
COND_LO = 3, // aka COND_NC
COND_GE = 4,
COND_L = 5,
+ COND_N = 6, // jump if negative
+ COND_NONE, // unconditional
COND_INVALID = -1
};
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
index 203864dd4065..8fa99dc13dd5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -64,11 +64,29 @@ include "MSP430InstrInfo.td"
def MSP430InstrInfo : InstrInfo;
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+}
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmParser : AsmParser {
+ let AllowDuplicateRegisterNames = 1;
+ let ShouldEmitMatchRegisterAltName = 1;
+}
+
//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def MSP430 : Target {
let InstructionSet = MSP430InstrInfo;
+ let AssemblyParsers = [MSP430AsmParser];
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 005f5f44a635..7a1998ad355d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -98,6 +98,7 @@ namespace {
MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
+ private:
StringRef getPassName() const override {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
@@ -112,8 +113,9 @@ namespace {
// Include the pieces autogenerated from the target description.
#include "MSP430GenDAGISel.inc"
- private:
+ // Main method to transform nodes into machine nodes.
void Select(SDNode *N) override;
+
bool tryIndexedLoad(SDNode *Op);
bool tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8,
unsigned Opc16);
@@ -250,11 +252,9 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
if (MatchAddress(N, AM))
return false;
- EVT VT = N.getValueType();
- if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase)
if (!AM.Base.Reg.getNode())
- AM.Base.Reg = CurDAG->getRegister(0, VT);
- }
+ AM.Base.Reg = CurDAG->getRegister(MSP430::SR, MVT::i16);
Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
? CurDAG->getTargetFrameIndex(
@@ -336,10 +336,10 @@ bool MSP430DAGToDAGISel::tryIndexedLoad(SDNode *N) {
unsigned Opcode = 0;
switch (VT.SimpleTy) {
case MVT::i8:
- Opcode = MSP430::MOV8rm_POST;
+ Opcode = MSP430::MOV8rp;
break;
case MVT::i16:
- Opcode = MSP430::MOV16rm_POST;
+ Opcode = MSP430::MOV16rp;
break;
default:
return false;
@@ -362,12 +362,11 @@ bool MSP430DAGToDAGISel::tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
MVT VT = LD->getMemoryVT().getSimpleVT();
unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ MachineMemOperand *MemRef = cast<MemSDNode>(N1)->getMemOperand();
SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
SDNode *ResNode =
CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemRef});
// Transfer chain.
ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
// Transfer writeback.
@@ -413,47 +412,47 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::ADD:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ MSP430::ADD8rp, MSP430::ADD16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ MSP430::ADD8rp, MSP430::ADD16rp))
return;
// Other cases are autogenerated.
break;
case ISD::SUB:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ MSP430::SUB8rp, MSP430::SUB16rp))
return;
// Other cases are autogenerated.
break;
case ISD::AND:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ MSP430::AND8rp, MSP430::AND16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ MSP430::AND8rp, MSP430::AND16rp))
return;
// Other cases are autogenerated.
break;
case ISD::OR:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ MSP430::BIS8rp, MSP430::BIS16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ MSP430::BIS8rp, MSP430::BIS16rp))
return;
// Other cases are autogenerated.
break;
case ISD::XOR:
if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
- MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ MSP430::XOR8rp, MSP430::XOR16rp))
return;
else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
- MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ MSP430::XOR8rp, MSP430::XOR16rp))
return;
// Other cases are autogenerated.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index f5b2bda5d1e4..3e706134afc5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -95,6 +95,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::CTTZ, MVT::i8, Expand);
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
@@ -217,8 +219,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
// { RTLIB::NEG_F64, "__mspabi_negd", ISD::SETCC_INVALID },
// { RTLIB::NEG_F32, "__mspabi_negf", ISD::SETCC_INVALID },
- // TODO: SLL/SRA/SRL are in libgcc, RLL isn't
-
// Universal Integer Operations - EABI Table 9
{ RTLIB::SDIV_I16, "__mspabi_divi", ISD::SETCC_INVALID },
{ RTLIB::SDIV_I32, "__mspabi_divli", ISD::SETCC_INVALID },
@@ -233,6 +233,13 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
{ RTLIB::UREM_I32, "__mspabi_remul", ISD::SETCC_INVALID },
{ RTLIB::UREM_I64, "__mspabi_remull", ISD::SETCC_INVALID },
+ // Bitwise Operations - EABI Table 10
+ // TODO: __mspabi_[srli/srai/slli] ARE implemented in libgcc
+ { RTLIB::SRL_I32, "__mspabi_srll", ISD::SETCC_INVALID },
+ { RTLIB::SRA_I32, "__mspabi_sral", ISD::SETCC_INVALID },
+ { RTLIB::SHL_I32, "__mspabi_slll", ISD::SETCC_INVALID },
+ // __mspabi_[srlll/srall/sllll/rlli/rlll] are NOT implemented in libgcc
+
};
for (const auto &LC : LibraryCalls) {
@@ -940,30 +947,40 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
// Expand non-constant shifts to loops:
if (!isa<ConstantSDNode>(N->getOperand(1)))
- switch (Opc) {
- default: llvm_unreachable("Invalid shift opcode!");
- case ISD::SHL:
- return DAG.getNode(MSP430ISD::SHL, dl,
- VT, N->getOperand(0), N->getOperand(1));
- case ISD::SRA:
- return DAG.getNode(MSP430ISD::SRA, dl,
- VT, N->getOperand(0), N->getOperand(1));
- case ISD::SRL:
- return DAG.getNode(MSP430ISD::SRL, dl,
- VT, N->getOperand(0), N->getOperand(1));
- }
+ return Op;
uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
// Expand the stuff into sequence of shifts.
- // FIXME: for some shift amounts this might be done better!
- // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
SDValue Victim = N->getOperand(0);
+ if (ShiftAmount >= 8) {
+ assert(VT == MVT::i16 && "Can not shift i8 by 8 and more");
+ switch(Opc) {
+ default:
+ llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // foo << (8 + N) => swpb(zext(foo)) << N
+ Victim = DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+ Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ // foo >> (8 + N) => sxt(swpb(foo)) >> N
+ Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+ Victim = (Opc == ISD::SRA)
+ ? DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Victim,
+ DAG.getValueType(MVT::i8))
+ : DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+ break;
+ }
+ ShiftAmount -= 8;
+ }
+
if (Opc == ISD::SRL && ShiftAmount) {
// Emit a special goodness here:
// srl A, 1 => clrc; rrc A
- Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ Victim = DAG.getNode(MSP430ISD::RRCL, dl, VT, Victim);
ShiftAmount -= 1;
}
@@ -1342,15 +1359,14 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
case MSP430ISD::RRA: return "MSP430ISD::RRA";
case MSP430ISD::RLA: return "MSP430ISD::RLA";
case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::RRCL: return "MSP430ISD::RRCL";
case MSP430ISD::CALL: return "MSP430ISD::CALL";
case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
case MSP430ISD::CMP: return "MSP430ISD::CMP";
case MSP430ISD::SETCC: return "MSP430ISD::SETCC";
case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
- case MSP430ISD::SHL: return "MSP430ISD::SHL";
- case MSP430ISD::SRA: return "MSP430ISD::SRA";
- case MSP430ISD::SRL: return "MSP430ISD::SRL";
+ case MSP430ISD::DADD: return "MSP430ISD::DADD";
}
return nullptr;
}
@@ -1397,33 +1413,49 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
const TargetInstrInfo &TII = *F->getSubtarget().getInstrInfo();
unsigned Opc;
+ bool ClearCarry = false;
const TargetRegisterClass * RC;
switch (MI.getOpcode()) {
default: llvm_unreachable("Invalid shift opcode!");
case MSP430::Shl8:
- Opc = MSP430::SHL8r1;
- RC = &MSP430::GR8RegClass;
- break;
+ Opc = MSP430::ADD8rr;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Shl16:
- Opc = MSP430::SHL16r1;
- RC = &MSP430::GR16RegClass;
- break;
+ Opc = MSP430::ADD16rr;
+ RC = &MSP430::GR16RegClass;
+ break;
case MSP430::Sra8:
- Opc = MSP430::SAR8r1;
- RC = &MSP430::GR8RegClass;
- break;
+ Opc = MSP430::RRA8r;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Sra16:
- Opc = MSP430::SAR16r1;
- RC = &MSP430::GR16RegClass;
- break;
+ Opc = MSP430::RRA16r;
+ RC = &MSP430::GR16RegClass;
+ break;
case MSP430::Srl8:
- Opc = MSP430::SAR8r1c;
- RC = &MSP430::GR8RegClass;
- break;
+ ClearCarry = true;
+ Opc = MSP430::RRC8r;
+ RC = &MSP430::GR8RegClass;
+ break;
case MSP430::Srl16:
- Opc = MSP430::SAR16r1c;
- RC = &MSP430::GR16RegClass;
- break;
+ ClearCarry = true;
+ Opc = MSP430::RRC16r;
+ RC = &MSP430::GR16RegClass;
+ break;
+ case MSP430::Rrcl8:
+ case MSP430::Rrcl16: {
+ BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+ .addReg(MSP430::SR).addImm(1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16
+ ? MSP430::RRC16r : MSP430::RRC8r;
+ BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg)
+ .addReg(SrcReg);
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
}
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1476,8 +1508,16 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
.addReg(ShiftAmtSrcReg).addMBB(BB)
.addReg(ShiftAmtReg2).addMBB(LoopBB);
- BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
- .addReg(ShiftReg);
+ if (ClearCarry)
+ BuildMI(LoopBB, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+ .addReg(MSP430::SR).addImm(1);
+ if (Opc == MSP430::ADD8rr || Opc == MSP430::ADD16rr)
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg)
+ .addReg(ShiftReg);
+ else
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
.addReg(ShiftAmtReg).addImm(1);
BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
@@ -1499,9 +1539,10 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
unsigned Opc = MI.getOpcode();
- if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
- Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
- Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16 ||
+ Opc == MSP430::Rrcl8 || Opc == MSP430::Rrcl16)
return EmitShiftInstr(MI, BB);
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 842d03df32fc..731bc1406711 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -36,6 +36,9 @@ namespace llvm {
/// Y = RRC X, rotate right via carry
RRC,
+ /// Rotate right via carry, carry gets cleared beforehand by clrc
+ RRCL,
+
/// CALL - These operations represent an abstract call
/// instruction, which includes a bunch of information.
CALL,
@@ -61,8 +64,9 @@ namespace llvm {
/// is condition code and operand 4 is flag operand.
SELECT_CC,
- /// SHL, SRA, SRL - Non-constant shifts.
- SHL, SRA, SRL
+ /// DADD - Decimal addition with carry
+ /// TODO Nothing generates a node of this type yet.
+ DADD,
};
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
index a9e87dad0cd8..e2e4503db20c 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -11,201 +11,431 @@
// Describe MSP430 instructions format here
//
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<2> val> {
- bits<2> Value = val;
-}
-
-def PseudoFrm : Format<0>;
-def SingleOpFrm : Format<1>;
-def DoubleOpFrm : Format<2>;
-def CondJumpFrm : Format<3>;
-
class SourceMode<bits<2> val> {
bits<2> Value = val;
}
-def SrcReg : SourceMode<0>;
-def SrcMem : SourceMode<1>;
-def SrcIndReg : SourceMode<2>;
-def SrcPostInc : SourceMode<3>;
-def SrcImm : SourceMode<3>;
+def SrcReg : SourceMode<0>; // r
+def SrcMem : SourceMode<1>; // m
+def SrcIndReg : SourceMode<2>; // n
+def SrcPostInc : SourceMode<3>; // p
+def SrcImm : SourceMode<3>; // i
+// SrcCGImm : SourceMode< >; // c
class DestMode<bit val> {
bit Value = val;
}
-def DstReg : DestMode<0>;
-def DstMem : DestMode<1>;
-
-class SizeVal<bits<3> val> {
- bits<3> Value = val;
-}
-
-def SizeUnknown : SizeVal<0>; // Unknown / unset size
-def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
-def Size2Bytes : SizeVal<2>;
-def Size4Bytes : SizeVal<3>;
-def Size6Bytes : SizeVal<4>;
+def DstReg : DestMode<0>; // r
+def DstMem : DestMode<1>; // m
// Generic MSP430 Format
-class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
- string asmstr> : Instruction {
- field bits<16> Inst;
+class MSP430Inst<dag outs, dag ins, int size, string asmstr> : Instruction {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
let Namespace = "MSP430";
dag OutOperandList = outs;
dag InOperandList = ins;
- Format Form = f;
- SizeVal Sz = sz;
-
- // Define how we want to layout our TargetSpecific information field... This
- // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
- let TSFlags{1-0} = Form.Value;
- let TSFlags{4-2} = Sz.Value;
-
- let AsmString = asmstr;
+ let AsmString = asmstr;
+ let Size = size;
}
-// FIXME: Create different classes for different addressing modes.
-
// MSP430 Double Operand (Format I) Instructions
-class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+class IForm<bits<4> opcode, DestMode ad, bit bw, SourceMode as, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+ : MSP430Inst<outs, ins, size, asmstr> {
let Pattern = pattern;
- DestMode ad = dest;
- SourceMode as = src;
-
- let Inst{12-15} = opcode;
+ bits<4> rs;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = rs;
let Inst{7} = ad.Value;
let Inst{6} = bw;
- let Inst{4-5} = as.Value;
+ let Inst{5-4} = as.Value;
+ let Inst{3-0} = rd;
}
// 8 bit IForm instructions
-class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+ : IForm<opcode, dest, 1, src, size, outs, ins, asmstr, pattern>;
class I8rr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+}
class I8ri<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+}
+
+class I8rc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstReg.Value;
+ let Inst{6} = 1;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = rd;
+}
class I8rm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
+
+class I8rn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
+
+class I8rp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
class I8mr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
class I8mi<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ bits<20> dst;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I8mc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 4, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<20> dst;
+
+ let Inst{31-16} = dst{19-4};
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstMem.Value;
+ let Inst{6} = 1;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = dst{3-0};
+}
class I8mm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm8<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ bits<20> dst;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I8mn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
+
+class I8mp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm8<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
// 16 bit IForm instructions
-class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+ : IForm<opcode, dest, 0, src, size, outs, ins, asmstr, pattern>;
class I16rr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+}
class I16ri<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+}
+
+class I16rc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<4> rd;
+
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstReg.Value;
+ let Inst{6} = 0;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = rd;
+}
class I16rm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
+
+class I16rn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
+
+class I16rp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+}
class I16mr<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Alpha";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
class I16mi<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<16> imm;
+ bits<20> dst;
+ let Inst{31-16} = imm;
+ let rs = 0b0000;
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I16mc<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 4, asmstr> {
+ let DecoderNamespace = "Beta";
+ let Pattern = pattern;
+
+ bits<6> imm;
+ bits<20> dst;
+
+ let Inst{31-16} = dst{19-4};
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = imm{3-0};
+ let Inst{7} = DstMem.Value;
+ let Inst{6} = 0;
+ let Inst{5-4} = imm{5-4};
+ let Inst{3-0} = dst{3-0};
+}
class I16mm<bits<4> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+ : IForm16<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Gamma";
+ bits<20> src;
+ bits<20> dst;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+ let rd = dst{3-0};
+ let Inst{47-32} = dst{19-4};
+}
+
+class I16mn<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
+
+class I16mp<bits<4> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IForm16<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+ let DecoderNamespace = "Delta";
+ bits<20> dst;
+ let rd = dst{3-0};
+ let Inst{31-16} = dst{19-4};
+}
// MSP430 Single Operand (Format II) Instructions
-class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+class IIForm<bits<3> opcode, bit bw, SourceMode as, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+ : MSP430Inst<outs, ins, size, asmstr> {
let Pattern = pattern;
-
- SourceMode as = src;
- let Inst{7-15} = opcode;
- let Inst{6} = bw;
- let Inst{4-5} = as.Value;
+ bits<4> rs;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = bw;
+ let Inst{5-4} = as.Value;
+ let Inst{3-0} = rs;
}
// 8 bit IIForm instructions
-class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm8<bits<3> opcode, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+ : IIForm<opcode, 1, src, size, outs, ins, asmstr, pattern>;
+
+class II8r<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
-class II8r<bits<9> opcode,
+class II8m<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IIForm8<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
-class II8m<bits<9> opcode,
+class II8i<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm8<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+ bits<16> imm;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+}
-class II8i<bits<9> opcode,
+class II8c<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let Pattern = pattern;
+
+ bits<6> imm;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = 1;
+ let Inst{5-0} = imm;
+}
+
+class II8n<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II8p<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm8<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
// 16 bit IIForm instructions
-class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm16<bits<3> opcode, SourceMode src, int size,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+ : IIForm<opcode, 0, src, size, outs, ins, asmstr, pattern>;
-class II16r<bits<9> opcode,
+class II16r<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
-class II16m<bits<9> opcode,
+class II16m<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+ bits<20> src;
+ let rs = src{3-0};
+ let Inst{31-16} = src{19-4};
+}
-class II16i<bits<9> opcode,
+class II16i<bits<3> opcode,
dag outs, dag ins, string asmstr, list<dag> pattern>
- : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+ : IIForm16<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+ bits<16> imm;
+ let rs = 0b0000;
+ let Inst{31-16} = imm;
+}
+
+class II16c<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
+ let Pattern = pattern;
+
+ bits<6> imm;
+
+ let Inst{15-10} = 0b000100;
+ let Inst{9-7} = opcode;
+ let Inst{6} = 0;
+ let Inst{5-0} = imm;
+}
+
+class II16n<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II16p<bits<3> opcode,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : IIForm16<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
// MSP430 Conditional Jumps Instructions
-class CJForm<bits<3> opcode, bits<3> cond,
- dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+class CJForm<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, 2, asmstr> {
let Pattern = pattern;
- let Inst{13-15} = opcode;
- let Inst{10-12} = cond;
+ bits<3> cond;
+ bits<10> dst;
+
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = cond;
+ let Inst{9-0} = dst;
}
// Pseudo instructions
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+ : MSP430Inst<outs, ins, 0, asmstr> {
let Pattern = pattern;
- let Inst{15-0} = 0;
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index dd1b30a3e470..c136933a51bc 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -301,35 +301,20 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MCInstrDesc &Desc = MI.getDesc();
- switch (Desc.TSFlags & MSP430II::SizeMask) {
- default:
- switch (Desc.getOpcode()) {
- default: llvm_unreachable("Unknown instruction size!");
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::DBG_VALUE:
- return 0;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
- return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- }
- }
- case MSP430II::SizeSpecial:
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unknown instruction size!");
- case MSP430::SAR8r1c:
- case MSP430::SAR16r1c:
- return 4;
- }
- case MSP430II::Size2Bytes:
- return 2;
- case MSP430II::Size4Bytes:
- return 4;
- case MSP430II::Size6Bytes:
- return 6;
+ switch (Desc.getOpcode()) {
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
+ *MF->getTarget().getMCAsmInfo());
}
+ }
+
+ return Desc.getSize();
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 45357f54c9c6..fee3bea9b8d6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -24,22 +24,6 @@ namespace llvm {
class MSP430Subtarget;
-/// MSP430II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MSP430II {
- enum {
- SizeShift = 2,
- SizeMask = 7 << SizeShift,
-
- SizeUnknown = 0 << SizeShift,
- SizeSpecial = 1 << SizeShift,
- Size2Bytes = 2 << SizeShift,
- Size4Bytes = 3 << SizeShift,
- Size6Bytes = 4 << SizeShift
- };
-}
-
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
virtual void anchor();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index cec43040f60d..25c81d94f75b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -34,8 +34,9 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
-def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
- SDTCisI8<2>]>;
+def SDT_MSP430DAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
@@ -48,6 +49,7 @@ def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+def MSP430rrcl : SDNode<"MSP430ISD::RRCL", SDTIntUnaryOp, []>;
def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
@@ -63,33 +65,88 @@ def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
[SDNPHasChain, SDNPInGlue]>;
def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
[SDNPInGlue]>;
-def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
-def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
-def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+def MSP430dadd : SDNode<"MSP430ISD::DADD", SDT_MSP430DAdd, []>;
//===----------------------------------------------------------------------===//
// MSP430 Operand Definitions.
//===----------------------------------------------------------------------===//
+def MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+}
+
// Address operands
def memsrc : Operand<i16> {
let PrintMethod = "printSrcMemOperand";
let MIOperandInfo = (ops GR16, i16imm);
+ let ParserMatchClass = MemAsmOperand;
+ let EncoderMethod = "getMemOpValue";
+ let DecoderMethod = "DecodeMemOperand";
}
def memdst : Operand<i16> {
let PrintMethod = "printSrcMemOperand";
let MIOperandInfo = (ops GR16, i16imm);
+ let ParserMatchClass = MemAsmOperand;
+ let EncoderMethod = "getMemOpValue";
+ let DecoderMethod = "DecodeMemOperand";
+}
+
+def IndRegAsmOperand : AsmOperandClass {
+ let Name = "IndReg";
+ let RenderMethod = "addRegOperands";
+}
+
+def indreg : Operand<i16> {
+ let PrintMethod = "printIndRegOperand";
+ let MIOperandInfo = (ops GR16);
+ let ParserMatchClass = IndRegAsmOperand;
+ let DecoderMethod = "DecodeGR16RegisterClass";
+}
+
+def PostIndRegAsmOperand : AsmOperandClass {
+ let Name = "PostIndReg";
+ let RenderMethod = "addRegOperands";
+}
+
+def postreg : Operand<i16> {
+ let PrintMethod = "printPostIndRegOperand";
+ let MIOperandInfo = (ops GR16);
+ let ParserMatchClass = PostIndRegAsmOperand;
+ let DecoderMethod = "DecodeGR16RegisterClass";
}
// Short jump targets have OtherVT type and are printed as pcrel imm values.
def jmptarget : Operand<OtherVT> {
let PrintMethod = "printPCRelImmOperand";
+ let EncoderMethod = "getPCRelImmOpValue";
}
// Operand for printing out a condition code.
def cc : Operand<i8> {
let PrintMethod = "printCCOperand";
+ let EncoderMethod = "getCCOpValue";
+}
+
+def CGImmAsmOperand : AsmOperandClass {
+ let Name = "CGImm";
+ let RenderMethod = "addImmOperands";
+}
+
+def cg8imm : Operand<i8>,
+ ImmLeaf<i8, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+ Imm == 4 || Imm == 8 || Imm == -1;}]> {
+ let ParserMatchClass = CGImmAsmOperand;
+ let EncoderMethod = "getCGImmOpValue";
+ let DecoderMethod = "DecodeCGImm";
+}
+
+def cg16imm : Operand<i16>,
+ ImmLeaf<i16, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+ Imm == 4 || Imm == 8 || Imm == -1;}]> {
+ let ParserMatchClass = CGImmAsmOperand;
+ let EncoderMethod = "getCGImmOpValue";
+ let DecoderMethod = "DecodeCGImm";
}
//===----------------------------------------------------------------------===//
@@ -102,6 +159,7 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
// Pattern Fragments
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def bic : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, (not node:$rhs))>;
def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
return N->hasOneUse();
}]>;
@@ -113,21 +171,21 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber SR.
-let Defs = [SP, SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SP, SR], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKDOWN",
+ "#ADJCALLSTACKDOWN $amt1 $amt2",
[(MSP430callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKUP",
+ "#ADJCALLSTACKUP $amt1 $amt2",
[(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
}
-let Defs = [SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SR], Uses = [SP] in {
def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset),
"# ADDframe PSEUDO", []>;
}
-let usesCustomInserter = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
let Uses = [SR] in {
def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
"# Select8 PSEUDO",
@@ -141,38 +199,44 @@ let usesCustomInserter = 1 in {
let Defs = [SR] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
- [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (shl GR8:$src, GR8:$cnt))]>;
def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Shl16 PSEUDO",
- [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ [(set GR16:$dst, (shl GR16:$src, GR8:$cnt))]>;
def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Sra8 PSEUDO",
- [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (sra GR8:$src, GR8:$cnt))]>;
def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Sra16 PSEUDO",
- [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ [(set GR16:$dst, (sra GR16:$src, GR8:$cnt))]>;
def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Srl8 PSEUDO",
- [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ [(set GR8:$dst, (srl GR8:$src, GR8:$cnt))]>;
def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
"# Srl16 PSEUDO",
- [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
-
+ [(set GR16:$dst, (srl GR16:$src, GR8:$cnt))]>;
+ def Rrcl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src), "",
+ [(set GR8:$dst, (MSP430rrcl GR8:$src))]>;
+ def Rrcl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src), "",
+ [(set GR16:$dst, (MSP430rrcl GR16:$src))]>;
}
}
-let hasSideEffects = 0 in
-def NOP : Pseudo<(outs), (ins), "nop", []>;
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
//
-// FIXME: Provide proper encoding!
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def RET : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs), (ins), "ret", [(MSP430retflag)]>;
- def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+ def RET : IForm16<0b0100, DstReg, SrcPostInc, 2,
+ (outs), (ins), "ret", [(MSP430retflag)]> {
+ let DecoderNamespace = "Delta";
+ let rs = 1;
+ let rd = 0;
+ }
+ def RETI : IIForm16<0b110, SrcReg, 2,
+ (outs), (ins), "reti", [(MSP430retiflag)]> {
+ let rs = 0;
+ }
}
let isBranch = 1, isTerminator = 1 in {
@@ -182,120 +246,143 @@ let isBranch = 1, isTerminator = 1 in {
// Direct branch
let isBarrier = 1 in {
// Short branch
- def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+ def JMP : CJForm<(outs), (ins jmptarget:$dst),
"jmp\t$dst",
- [(br bb:$dst)]>;
- let isIndirectBranch = 1 in {
+ [(br bb:$dst)]> {
+ let cond = 0b111;
+ }
+ let isIndirectBranch = 1, rd = 0 in {
// Long branches
- def Bi : I16ri<0, (outs), (ins i16imm:$brdst),
- "br\t$brdst",
- [(brind tblockaddress:$brdst)]>;
- def Br : I16rr<0, (outs), (ins GR16:$brdst),
- "br\t$brdst",
- [(brind GR16:$brdst)]>;
- def Bm : I16rm<0, (outs), (ins memsrc:$brdst),
- "br\t$brdst",
- [(brind (load addr:$brdst))]>;
+ def Bi : I16ri<0b0100, (outs), (ins i16imm:$imm),
+ "br\t$imm",
+ [(brind tblockaddress:$imm)]>;
+ def Br : I16rr<0b0100, (outs), (ins GR16:$rs),
+ "br\t$rs",
+ [(brind GR16:$rs)]>;
+ def Bm : I16rm<0b0100, (outs), (ins memsrc:$src),
+ "br\t$src",
+ [(brind (load addr:$src))]>;
}
}
// Conditional branches
let Uses = [SR] in
- def JCC : CJForm<0, 0,
- (outs), (ins jmptarget:$dst, cc:$cc),
- "j$cc\t$dst",
- [(MSP430brcc bb:$dst, imm:$cc)]>;
+ def JCC : CJForm<(outs), (ins jmptarget:$dst, cc:$cond),
+ "j$cond\t$dst",
+ [(MSP430brcc bb:$dst, imm:$cond)]>;
} // isBranch, isTerminator
//===----------------------------------------------------------------------===//
// Call Instructions...
//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. SPW is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [R11, R12, R13, R14, R15, SR],
- Uses = [SP] in {
- def CALLi : II16i<0x0,
- (outs), (ins i16imm:$dst),
- "call\t$dst", [(MSP430call imm:$dst)]>;
- def CALLr : II16r<0x0,
- (outs), (ins GR16:$dst),
- "call\t$dst", [(MSP430call GR16:$dst)]>;
- def CALLm : II16m<0x0,
- (outs), (ins memsrc:$dst),
- "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
- }
-
+// All calls clobber the non-callee saved registers. SPW is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1,
+ Defs = [R11, R12, R13, R14, R15, SR],
+ Uses = [SP] in {
+ def CALLi : II16i<0b101,
+ (outs), (ins i16imm:$imm),
+ "call\t$imm", [(MSP430call imm:$imm)]>;
+ def CALLr : II16r<0b101,
+ (outs), (ins GR16:$rs),
+ "call\t$rs", [(MSP430call GR16:$rs)]>;
+ def CALLm : II16m<0b101,
+ (outs), (ins memsrc:$src),
+ "call\t$src", [(MSP430call (load addr:$src))]>;
+ def CALLn : II16n<0b101, (outs), (ins indreg:$rs), "call\t$rs", []>;
+ def CALLp : II16p<0b101, (outs), (ins postreg:$rs), "call\t$rs", []>;
+}
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
//
-let Defs = [SP], Uses = [SP], hasSideEffects=0 in {
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in {
let mayLoad = 1 in
-def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+def POP16r : IForm16<0b0100, DstReg, SrcPostInc, 2,
+ (outs GR16:$rd), (ins), "pop\t$rd", []> {
+ let DecoderNamespace = "Delta";
+ let rs = 1;
+}
let mayStore = 1 in
-def PUSH16r : II16r<0x0,
- (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+def PUSH8r : II8r<0b100, (outs), (ins GR8:$rs), "push.b\t$rs", []>;
+def PUSH16r : II16r<0b100, (outs), (ins GR16:$rs), "push\t$rs", []>;
+def PUSH16c : II16c<0b100, (outs), (ins cg16imm:$imm), "push\t$imm", []>;
+def PUSH16i : II16i<0b100, (outs), (ins i16imm:$imm), "push\t$imm", []>;
}
//===----------------------------------------------------------------------===//
// Move Instructions
-// FIXME: Provide proper encoding!
let hasSideEffects = 0 in {
-def MOV8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "mov.b\t{$src, $dst}",
+def MOV8rr : I8rr<0b0100,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "mov.b\t{$rs, $rd}",
[]>;
-def MOV16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "mov.w\t{$src, $dst}",
+def MOV16rr : I16rr<0b0100,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "mov\t{$rs, $rd}",
[]>;
}
-// FIXME: Provide proper encoding!
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV8ri : I8ri<0x0,
- (outs GR8:$dst), (ins i8imm:$src),
- "mov.b\t{$src, $dst}",
- [(set GR8:$dst, imm:$src)]>;
-def MOV16ri : I16ri<0x0,
- (outs GR16:$dst), (ins i16imm:$src),
- "mov.w\t{$src, $dst}",
- [(set GR16:$dst, imm:$src)]>;
+def MOV8rc : I8rc<0b0100,
+ (outs GR8:$rd), (ins cg8imm:$imm),
+ "mov.b\t$imm, $rd",
+ [(set GR8:$rd, cg8imm:$imm)]>;
+def MOV16rc : I16rc<0b0100,
+ (outs GR16:$rd), (ins cg16imm:$imm),
+ "mov\t$imm, $rd",
+ [(set GR16:$rd, cg16imm:$imm)]>;
+def MOV8ri : I8ri<0b0100,
+ (outs GR8:$rd), (ins i8imm:$imm),
+ "mov.b\t{$imm, $rd}",
+ [(set GR8:$rd, imm:$imm)]>;
+def MOV16ri : I16ri<0b0100,
+ (outs GR16:$rd), (ins i16imm:$imm),
+ "mov\t{$imm, $rd}",
+ [(set GR16:$rd, imm:$imm)]>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOV8rm : I8rm<0x0,
- (outs GR8:$dst), (ins memsrc:$src),
- "mov.b\t{$src, $dst}",
- [(set GR8:$dst, (load addr:$src))]>;
-def MOV16rm : I16rm<0x0,
- (outs GR16:$dst), (ins memsrc:$src),
- "mov.w\t{$src, $dst}",
- [(set GR16:$dst, (load addr:$src))]>;
+def MOV8rm : I8rm<0b0100,
+ (outs GR8:$rd), (ins memsrc:$src),
+ "mov.b\t{$src, $rd}",
+ [(set GR8:$rd, (load addr:$src))]>;
+def MOV16rm : I16rm<0b0100,
+ (outs GR16:$rd), (ins memsrc:$src),
+ "mov\t{$src, $rd}",
+ [(set GR16:$rd, (load addr:$src))]>;
+def MOV8rn : I8rn<0b0100,
+ (outs GR8:$rd), (ins indreg:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR8:$rd, (load addr:$rs))]>;
+def MOV16rn : I16rn<0b0100,
+ (outs GR16:$rd), (ins indreg:$rs),
+ "mov\t{$rs, $rd}",
+ [(set GR16:$rd, (load addr:$rs))]>;
+}
+
+let isCodeGenOnly = 1 in {
+def MOVZX16rr8 : I8rr<0b0100,
+ (outs GR16:$rd), (ins GR8:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR16:$rd, (zext GR8:$rs))]>;
+def MOVZX16rm8 : I8rm<0b0100,
+ (outs GR16:$rd), (ins memsrc:$src),
+ "mov.b\t{$src, $rd}",
+ [(set GR16:$rd, (zextloadi16i8 addr:$src))]>;
}
-def MOVZX16rr8 : I8rr<0x0,
- (outs GR16:$dst), (ins GR8:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zext GR8:$src))]>;
-def MOVZX16rm8 : I8rm<0x0,
- (outs GR16:$dst), (ins memsrc:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
-def MOV8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
- "mov.b\t{@$base+, $dst}", []>;
-def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
- "mov.w\t{@$base+, $dst}", []>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$rs = $wb" in {
+def MOV8rp : I8rp<0b0100,
+ (outs GR8:$rd, GR16:$wb), (ins postreg:$rs),
+ "mov.b\t{$rs, $rd}", []>;
+def MOV16rp : I16rp<0b0100,
+ (outs GR16:$rd, GR16:$wb), (ins postreg:$rs),
+ "mov\t{$rs, $rd}", []>;
}
// Any instruction that defines a 8-bit result leaves the high half of the
@@ -313,821 +400,557 @@ def def8 : PatLeaf<(i8 GR8:$src), [{
def : Pat<(i16 (zext def8:$src)),
(SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
-def MOV8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "mov.b\t{$src, $dst}",
- [(store (i8 imm:$src), addr:$dst)]>;
-def MOV16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "mov.w\t{$src, $dst}",
- [(store (i16 imm:$src), addr:$dst)]>;
-
-def MOV8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "mov.b\t{$src, $dst}",
- [(store GR8:$src, addr:$dst)]>;
-def MOV16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "mov.w\t{$src, $dst}",
- [(store GR16:$src, addr:$dst)]>;
-
-def MOV8mm : I8mm<0x0,
+def MOV8mc : I8mc<0b0100,
+ (outs), (ins memdst:$dst, cg8imm:$imm),
+ "mov.b\t{$imm, $dst}",
+ [(store (i8 cg8imm:$imm), addr:$dst)]>;
+def MOV16mc : I16mc<0b0100,
+ (outs), (ins memdst:$dst, cg16imm:$imm),
+ "mov\t{$imm, $dst}",
+ [(store (i16 cg16imm:$imm), addr:$dst)]>;
+
+def MOV8mi : I8mi<0b0100,
+ (outs), (ins memdst:$dst, i8imm:$imm),
+ "mov.b\t{$imm, $dst}",
+ [(store (i8 imm:$imm), addr:$dst)]>;
+def MOV16mi : I16mi<0b0100,
+ (outs), (ins memdst:$dst, i16imm:$imm),
+ "mov\t{$imm, $dst}",
+ [(store (i16 imm:$imm), addr:$dst)]>;
+
+def MOV8mr : I8mr<0b0100,
+ (outs), (ins memdst:$dst, GR8:$rs),
+ "mov.b\t{$rs, $dst}",
+ [(store GR8:$rs, addr:$dst)]>;
+def MOV16mr : I16mr<0b0100,
+ (outs), (ins memdst:$dst, GR16:$rs),
+ "mov\t{$rs, $dst}",
+ [(store GR16:$rs, addr:$dst)]>;
+
+def MOV8mm : I8mm<0b0100,
(outs), (ins memdst:$dst, memsrc:$src),
"mov.b\t{$src, $dst}",
[(store (i8 (load addr:$src)), addr:$dst)]>;
-def MOV16mm : I16mm<0x0,
+def MOV16mm : I16mm<0b0100,
(outs), (ins memdst:$dst, memsrc:$src),
- "mov.w\t{$src, $dst}",
+ "mov\t{$src, $dst}",
[(store (i16 (load addr:$src)), addr:$dst)]>;
+def MOV8mn : I8mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+ "mov.b\t{$rs, $dst}", []>;
+def MOV16mn : I16mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+ "mov\t{$rs, $dst}", []>;
+
//===----------------------------------------------------------------------===//
// Arithmetic Instructions
-let Constraints = "$src = $dst" in {
-
-let Defs = [SR] in {
-
-let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
-
-def ADD8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+multiclass Arith<bits<4> opcode, string asmstring, SDNode node,
+ bit commutes, list<Register> uses> {
+ let Defs = [SR], Uses = uses in {
+ let Constraints = "$src2 = $rd" in {
+ let isCommutable = commutes in {
+ def 8rr : I8rr<opcode, (outs GR8:$rd), (ins GR8:$src2, GR8:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, GR8:$rs)),
+ (implicit SR)]>;
+ def 16rr : I16rr<opcode, (outs GR16:$rd), (ins GR16:$src2, GR16:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, GR16:$rs)),
(implicit SR)]>;
-def ADD16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-}
-
-def ADD8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+ }
+ def 8rm : I8rm<opcode, (outs GR8:$rd), (ins GR8:$src2, memsrc:$src),
+ !strconcat(asmstring, ".b\t$src, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, (load addr:$src))),
+ (implicit SR)]>;
+ def 16rm : I16rm<opcode, (outs GR16:$rd), (ins GR16:$src2, memsrc:$src),
+ !strconcat(asmstring, "\t$src, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, (load addr:$src))),
(implicit SR)]>;
-def ADD16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "add.b\t{@$base+, $dst}", []>;
-def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "add.w\t{@$base+, $dst}", []>;
-}
-
-
-def ADD8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+ def 8rn : I8rn<opcode, (outs GR8:$rd), (ins GR8:$src2, indreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+ def 16rn : I16rn<opcode, (outs GR16:$rd), (ins GR16:$src2, indreg:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"), []>;
+ let mayLoad = 1,
+ hasExtraDefRegAllocReq = 1,
+ Constraints = "$rs = $wb, $src2 = $rd" in {
+ def 8rp : I8rp<opcode, (outs GR8:$rd, GR16:$wb), (ins GR8:$src2, postreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+ def 16rp : I16rp<opcode, (outs GR16:$rd, GR16:$wb), (ins GR16:$src2, postreg:$rs),
+ !strconcat(asmstring, "\t$rs, $rd"), []>;
+ }
+ def 8rc : I8rc<opcode, (outs GR8:$rd), (ins GR8:$src2, cg8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, cg8imm:$imm)),
+ (implicit SR)]>;
+ def 16rc : I16rc<opcode, (outs GR16:$rd), (ins GR16:$src2, cg16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, cg16imm:$imm)),
+ (implicit SR)]>;
+ def 8ri : I8ri<opcode, (outs GR8:$rd), (ins GR8:$src2, i8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $rd"),
+ [(set GR8:$rd, (node GR8:$src2, imm:$imm)),
+ (implicit SR)]>;
+ def 16ri : I16ri<opcode, (outs GR16:$rd), (ins GR16:$src2, i16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $rd"),
+ [(set GR16:$rd, (node GR16:$src2, imm:$imm)),
+ (implicit SR)]>;
+ }
+ def 8mr : I8mr<opcode, (outs), (ins memdst:$dst, GR8:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"),
+ [(store (node (load addr:$dst), GR8:$rs), addr:$dst),
+ (implicit SR)]>;
+ def 16mr : I16mr<opcode, (outs), (ins memdst:$dst, GR16:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"),
+ [(store (node (load addr:$dst), GR16:$rs), addr:$dst),
(implicit SR)]>;
-def ADD16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-let Constraints = "" in {
-def ADD8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ def 8mc : I8mc<opcode, (outs), (ins memdst:$dst, cg8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i8 cg8imm:$imm)), addr:$dst),
+ (implicit SR)]>;
+ def 16mc : I16mc<opcode, (outs), (ins memdst:$dst, cg16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i16 cg16imm:$imm)), addr:$dst),
(implicit SR)]>;
-def ADD16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def ADD8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ def 8mi : I8mi<opcode, (outs), (ins memdst:$dst, i8imm:$imm),
+ !strconcat(asmstring, ".b\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i8 imm:$imm)), addr:$dst),
+ (implicit SR)]>;
+ def 16mi : I16mi<opcode, (outs), (ins memdst:$dst, i16imm:$imm),
+ !strconcat(asmstring, "\t$imm, $dst"),
+ [(store (node (load addr:$dst), (i16 imm:$imm)), addr:$dst),
(implicit SR)]>;
-def ADD16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
-
-def ADD8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "add.b\t{$src, $dst}",
- [(store (add (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+ def 8mm : I8mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+ !strconcat(asmstring, ".b\t$src, $dst"),
+ [(store (node (load addr:$dst),
+ (i8 (load addr:$src))), addr:$dst),
+ (implicit SR)]>;
+ def 16mm : I16mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+ !strconcat(asmstring, "\t$src, $dst"),
+ [(store (node (load addr:$dst),
+ (i16 (load addr:$src))), addr:$dst),
(implicit SR)]>;
-def ADD16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "add.w\t{$src, $dst}",
- [(store (add (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
+ def 8mn : I8mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+ def 16mn : I16mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"), []>;
+ def 8mp : I8mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+ !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+ def 16mp : I16mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+ !strconcat(asmstring, "\t$rs, $dst"), []>;
+ }
}
-let Uses = [SR] in {
-
-let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
-def ADC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def ADC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-} // isCommutable
-
-def ADC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def ADC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def ADC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
- (implicit SR)]>;
-def ADC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
+defm ADD : Arith<0b0101, "add", add, 1, []>;
+defm ADDC : Arith<0b0110, "addc", adde, 1, [SR]>;
+defm AND : Arith<0b1111, "and", and, 1, []>;
+defm BIS : Arith<0b1101, "bis", or, 1, []>;
+defm BIC : Arith<0b1100, "bic", bic, 0, []>;
+defm XOR : Arith<0b1110, "xor", xor, 1, []>;
+defm SUB : Arith<0b1000, "sub", sub, 0, []>;
+defm SUBC : Arith<0b0111, "subc", sube, 0, [SR]>;
+defm DADD : Arith<0b1010, "dadd", MSP430dadd, 1, [SR]>;
+
+def ADC8r : InstAlias<"adc.b\t$dst", (ADDC8rc GR8:$dst, 0)>;
+def ADC16r : InstAlias<"adc\t$dst", (ADDC16rc GR16:$dst, 0)>;
+def ADC8m : InstAlias<"adc.b\t$dst", (ADDC8mc memdst:$dst, 0)>;
+def ADC16m : InstAlias<"adc\t$dst", (ADDC16mc memdst:$dst, 0)>;
+
+def DADC8r : InstAlias<"dadc.b\t$dst", (DADD8rc GR8:$dst, 0)>;
+def DADC16r : InstAlias<"dadc\t$dst", (DADD16rc GR16:$dst, 0)>;
+def DADC8m : InstAlias<"dadc.b\t$dst", (DADD8mc memdst:$dst, 0)>;
+def DADC16m : InstAlias<"dadc\t$dst", (DADD16mc memdst:$dst, 0)>;
+
+def DEC8r : InstAlias<"dec.b\t$dst", (SUB8rc GR8:$dst, 1)>;
+def DEC16r : InstAlias<"dec\t$dst", (SUB16rc GR16:$dst, 1)>;
+def DEC8m : InstAlias<"dec.b\t$dst", (SUB8mc memdst:$dst, 1)>;
+def DEC16m : InstAlias<"dec\t$dst", (SUB16mc memdst:$dst, 1)>;
+
+def DECD8r : InstAlias<"decd.b\t$dst", (SUB8rc GR8:$dst, 2)>;
+def DECD16r : InstAlias<"decd\t$dst", (SUB16rc GR16:$dst, 2)>;
+def DECD8m : InstAlias<"decd.b\t$dst", (SUB8mc memdst:$dst, 2)>;
+def DECD16m : InstAlias<"decd\t$dst", (SUB16mc memdst:$dst, 2)>;
+
+def INC8r : InstAlias<"inc.b\t$dst", (ADD8rc GR8:$dst, 1)>;
+def INC16r : InstAlias<"inc\t$dst", (ADD16rc GR16:$dst, 1)>;
+def INC8m : InstAlias<"inc.b\t$dst", (ADD8mc memdst:$dst, 1)>;
+def INC16m : InstAlias<"inc\t$dst", (ADD16mc memdst:$dst, 1)>;
+
+def INCD8r : InstAlias<"incd.b\t$dst", (ADD8rc GR8:$dst, 2)>;
+def INCD16r : InstAlias<"incd\t$dst", (ADD16rc GR16:$dst, 2)>;
+def INCD8m : InstAlias<"incd.b\t$dst", (ADD8mc memdst:$dst, 2)>;
+def INCD16m : InstAlias<"incd\t$dst", (ADD16mc memdst:$dst, 2)>;
+
+def SBC8r : InstAlias<"sbc.b\t$dst", (SUBC8rc GR8:$dst, 0)>;
+def SBC16r : InstAlias<"sbc\t$dst", (SUBC16rc GR16:$dst, 0)>;
+def SBC8m : InstAlias<"sbc.b\t$dst", (SUBC8mc memdst:$dst, 0)>;
+def SBC16m : InstAlias<"sbc\t$dst", (SUBC16mc memdst:$dst, 0)>;
+
+def INV8r : InstAlias<"inv.b\t$dst", (XOR8rc GR8:$dst, -1)>;
+def INV16r : InstAlias<"inv\t$dst", (XOR16rc GR16:$dst, -1)>;
+def INV8m : InstAlias<"inv.b\t$dst", (XOR8mc memdst:$dst, -1)>;
+def INV16m : InstAlias<"inv\t$dst", (XOR16mc memdst:$dst, -1)>;
+
+// printAliasInstr() doesn't check $dst operands are actually equal
+// for RLA and RLC aliases below, so disable printing aliases.
+
+def RLA8r : InstAlias<"rla.b\t$dst", (ADD8rr GR8:$dst, GR8:$dst), 0>;
+def RLA16r : InstAlias<"rla\t$dst", (ADD16rr GR16:$dst, GR16:$dst), 0>;
+def RLA8m : InstAlias<"rla.b\t$dst", (ADD8mm memdst:$dst, memdst:$dst), 0>;
+def RLA16m : InstAlias<"rla\t$dst", (ADD16mm memdst:$dst, memdst:$dst), 0>;
+
+def RLC8r : InstAlias<"rlc.b\t$dst", (ADDC8rr GR8:$dst, GR8:$dst), 0>;
+def RLC16r : InstAlias<"rlc\t$dst", (ADDC16rr GR16:$dst, GR16:$dst), 0>;
+def RLC8m : InstAlias<"rlc.b\t$dst", (ADDC8mm memdst:$dst, memdst:$dst), 0>;
+def RLC16m : InstAlias<"rlc\t$dst", (ADDC16mm memdst:$dst, memdst:$dst), 0>;
+
+def DINT : InstAlias<"dint", (BIC16rc SR, 8)>;
+def EINT : InstAlias<"eint", (BIS16rc SR, 8)>;
+
+def NOP : InstAlias<"nop", (MOV16rc CG, 0)>;
+
+def CLR8r : InstAlias<"clr.b\t$dst", (MOV8rc GR8:$dst, 0)>;
+def CLR16r : InstAlias<"clr\t$dst", (MOV16rc GR16:$dst, 0)>;
+def CLR8m : InstAlias<"clr.b\t$dst", (MOV8mc memdst:$dst, 0)>;
+def CLR16m : InstAlias<"clr\t$dst", (MOV16mc memdst:$dst, 0)>;
+
+def CLRC : InstAlias<"clrc", (BIC16rc SR, 1)>;
+def CLRN : InstAlias<"clrn", (BIC16rc SR, 4)>;
+def CLRZ : InstAlias<"clrz", (BIC16rc SR, 2)>;
+def SETC : InstAlias<"setc", (BIS16rc SR, 1)>;
+def SETN : InstAlias<"setn", (BIS16rc SR, 4)>;
+def SETZ : InstAlias<"setz", (BIS16rc SR, 2)>;
+
+def : Pat<(MSP430rla GR8:$dst), (ADD8rr $dst, $dst)>;
+def : Pat<(MSP430rla GR16:$dst), (ADD16rr $dst, $dst)>;
+
+// Format-II (Single Operand) Instruction
+// Register mode
+let Constraints = "$rs = $rd" in {
-let Constraints = "" in {
-def ADC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def ADC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def ADC8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+let Defs = [SR] in {
+def RRA8r : II8r<0b010,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "rra.b\t$rd",
+ [(set GR8:$rd, (MSP430rra GR8:$rs)),
(implicit SR)]>;
-def ADC16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def RRA16r : II16r<0b010,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "rra\t$rd",
+ [(set GR16:$rd, (MSP430rra GR16:$rs)),
(implicit SR)]>;
-def ADC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "addc.b\t{$src, $dst}",
- [(store (adde (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+let Uses = [SR] in {
+def RRC8r : II8r<0b000,
+ (outs GR8:$rd), (ins GR8:$rs),
+ "rrc.b\t$rd",
+ [(set GR8:$rd, (MSP430rrc GR8:$rs)),
(implicit SR)]>;
-def ADC16mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "addc.w\t{$src, $dst}",
- [(store (adde (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
+def RRC16r : II16r<0b000,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "rrc\t$rd",
+ [(set GR16:$rd, (MSP430rrc GR16:$rs)),
(implicit SR)]>;
-}
-
} // Uses = [SR]
-let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
-def AND8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def AND16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+def SEXT16r : II16r<0b011,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "sxt\t$rd",
+ [(set GR16:$rd, (sext_inreg GR16:$rs, i8)),
(implicit SR)]>;
-}
-def AND8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def AND16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, imm:$src2)),
- (implicit SR)]>;
+} // Defs = [SR]
-def AND8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
- (implicit SR)]>;
-def AND16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
+let isCodeGenOnly = 1 in
+def ZEXT16r : I8rr<0b0100,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "mov.b\t{$rs, $rd}",
+ [(set GR16:$rd, (zext (trunc GR16:$rs)))]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "and.b\t{@$base+, $dst}", []>;
-def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "and.w\t{@$base+, $dst}", []>;
-}
+def SWPB16r : II16r<0b001,
+ (outs GR16:$rd), (ins GR16:$rs),
+ "swpb\t$rd",
+ [(set GR16:$rd, (bswap GR16:$rs))]>;
-let Constraints = "" in {
-def AND8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def AND16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+} // Constraints = "$src = $dst"
-def AND8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+// Indexed, indirect register and indirect autoincrement modes
+let Defs = [SR] in {
+def RRA8m : II8m<0b010,
+ (outs), (ins memsrc:$src),
+ "rra.b\t$src",
+ [(store (MSP430rra (i8 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def AND16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
-
-def AND8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "and.b\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+def RRA16m : II16m<0b010,
+ (outs), (ins memsrc:$src),
+ "rra\t$src",
+ [(store (MSP430rra (i16 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def AND16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "and.w\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-}
-
-let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
-def OR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
-def OR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
-}
-
-def OR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
-def OR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
-
-def OR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
-def OR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "bis.b\t{@$base+, $dst}", []>;
-def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "bis.w\t{@$base+, $dst}", []>;
-}
-
-let Constraints = "" in {
-def OR8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
-def OR16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
-
-def OR8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def OR16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
-
-def OR8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (i8 (load addr:$dst)),
- (i8 (load addr:$src))), addr:$dst)]>;
-def OR16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (i16 (load addr:$dst)),
- (i16 (load addr:$src))), addr:$dst)]>;
-}
-
-// bic does not modify condition codes
-def BIC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
-def BIC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
-
-def BIC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
-def BIC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
-
-let Constraints = "" in {
-def BIC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "bic.b\t{$src, $dst}",
- [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
-def BIC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "bic.w\t{$src, $dst}",
- [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
-
-def BIC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bic.b\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (not (i8 (load addr:$src)))), addr:$dst)]>;
-def BIC16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "bic.w\t{$src, $dst}",
- [(store (and (load addr:$dst),
- (not (i16 (load addr:$src)))), addr:$dst)]>;
-}
-let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
-def XOR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def XOR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-}
+def RRA8n : II8n<0b010, (outs), (ins indreg:$rs), "rra.b\t$rs", []>;
+def RRA16n : II16n<0b010, (outs), (ins indreg:$rs), "rra\t$rs", []>;
+def RRA8p : II8p<0b010, (outs), (ins postreg:$rs), "rra.b\t$rs", []>;
+def RRA16p : II16p<0b010, (outs), (ins postreg:$rs), "rra\t$rs", []>;
-def XOR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+let Uses = [SR] in {
+def RRC8m : II8m<0b000,
+ (outs), (ins memsrc:$src),
+ "rrc.b\t$src",
+ [(store (MSP430rrc (i8 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def XOR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def XOR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+def RRC16m : II16m<0b000,
+ (outs), (ins memsrc:$src),
+ "rrc\t$src",
+ [(store (MSP430rrc (i16 (load addr:$src))), addr:$src),
(implicit SR)]>;
-def XOR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
- (implicit SR)]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "xor.b\t{@$base+, $dst}", []>;
-def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "xor.w\t{@$base+, $dst}", []>;
-}
+def RRC8n : II8n<0b000, (outs), (ins indreg:$rs), "rrc.b\t$rs", []>;
+def RRC16n : II16n<0b000, (outs), (ins indreg:$rs), "rrc\t$rs", []>;
+def RRC8p : II8p<0b000, (outs), (ins postreg:$rs), "rrc.b\t$rs", []>;
+def RRC16p : II16p<0b000, (outs), (ins postreg:$rs), "rrc\t$rs", []>;
-let Constraints = "" in {
-def XOR8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def XOR16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
-
-def XOR8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SR)]>;
-def XOR16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
+} // Uses = [SR]
-def XOR8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "xor.b\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-def XOR16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "xor.w\t{$src, $dst}",
- [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+def SEXT16m : II16m<0b011,
+ (outs), (ins memsrc:$src),
+ "sxt\t$src",
+ [(store (sext_inreg (extloadi16i8 addr:$src), i8),
+ addr:$src),
(implicit SR)]>;
-}
+def SEXT16n : II16n<0b011, (outs), (ins indreg:$rs), "sxt\t$rs", []>;
+def SEXT16p : II16p<0b011, (outs), (ins postreg:$rs), "sxt\t$rs", []>;
+} // Defs = [SR]
-def SUB8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def SUB16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
- (implicit SR)]>;
+def SWPB16m : II16m<0b001,
+ (outs), (ins memsrc:$src),
+ "swpb\t$src",
+ [(store (bswap (i16 (load addr:$src))), addr:$src)]>;
+def SWPB16n : II16n<0b001, (outs), (ins indreg:$rs), "swpb\t$rs", []>;
+def SWPB16p : II16p<0b001, (outs), (ins postreg:$rs), "swpb\t$rs", []>;
-def SUB8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+// Integer comparisons
+let Defs = [SR] in {
+def CMP8rr : I8rr<0b1001,
+ (outs), (ins GR8:$rd, GR8:$rs),
+ "cmp.b\t$rs, $rd",
+ [(MSP430cmp GR8:$rd, GR8:$rs), (implicit SR)]>;
+def CMP16rr : I16rr<0b1001,
+ (outs), (ins GR16:$rd, GR16:$rs),
+ "cmp\t$rs, $rd",
+ [(MSP430cmp GR16:$rd, GR16:$rs), (implicit SR)]>;
+
+def CMP8rc : I8rc<0b1001,
+ (outs), (ins GR8:$rd, cg8imm:$imm),
+ "cmp.b\t$imm, $rd",
+ [(MSP430cmp GR8:$rd, cg8imm:$imm), (implicit SR)]>;
+def CMP16rc : I16rc<0b1001,
+ (outs), (ins GR16:$rd, cg16imm:$imm),
+ "cmp\t$imm, $rd",
+ [(MSP430cmp GR16:$rd, cg16imm:$imm), (implicit SR)]>;
+
+def CMP8ri : I8ri<0b1001,
+ (outs), (ins GR8:$rd, i8imm:$imm),
+ "cmp.b\t$imm, $rd",
+ [(MSP430cmp GR8:$rd, imm:$imm), (implicit SR)]>;
+def CMP16ri : I16ri<0b1001,
+ (outs), (ins GR16:$rd, i16imm:$imm),
+ "cmp\t$imm, $rd",
+ [(MSP430cmp GR16:$rd, imm:$imm), (implicit SR)]>;
+
+def CMP8mc : I8mc<0b1001,
+ (outs), (ins memsrc:$dst, cg8imm:$imm),
+ "cmp.b\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst), (i8 cg8imm:$imm)),
(implicit SR)]>;
-def SUB16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+def CMP16mc : I16mc<0b1001,
+ (outs), (ins memsrc:$dst, cg16imm:$imm),
+ "cmp\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst), (i16 cg16imm:$imm)),
(implicit SR)]>;
-def SUB8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+def CMP8mi : I8mi<0b1001,
+ (outs), (ins memsrc:$dst, i8imm:$imm),
+ "cmp.b\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst),
+ (i8 imm:$imm)), (implicit SR)]>;
+def CMP16mi : I16mi<0b1001,
+ (outs), (ins memsrc:$dst, i16imm:$imm),
+ "cmp\t$imm, $dst",
+ [(MSP430cmp (load addr:$dst),
+ (i16 imm:$imm)), (implicit SR)]>;
+
+def CMP8rm : I8rm<0b1001,
+ (outs), (ins GR8:$rd, memsrc:$src),
+ "cmp.b\t$src, $rd",
+ [(MSP430cmp GR8:$rd, (load addr:$src)),
(implicit SR)]>;
-def SUB16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+def CMP16rm : I16rm<0b1001,
+ (outs), (ins GR16:$rd, memsrc:$src),
+ "cmp\t$src, $rd",
+ [(MSP430cmp GR16:$rd, (load addr:$src)),
(implicit SR)]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src = $dst" in {
-def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src, GR16:$base),
- "sub.b\t{@$base+, $dst}", []>;
-def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
- (outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src, GR16:$base),
- "sub.w\t{@$base+, $dst}", []>;
-}
+def CMP8rn : I8rn<0b1001,
+ (outs), (ins GR8:$rd, indreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rn : I16rn<0b1001,
+ (outs), (ins GR16:$rd, indreg:$rs), "cmp\t$rs, $rd", []>;
-let Constraints = "" in {
-def SUB8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def SUB16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+def CMP8rp : I8rp<0b1001,
+ (outs), (ins GR8:$rd, postreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rp : I16rp<0b1001,
+ (outs), (ins GR16:$rd, postreg:$rs), "cmp\t$rs, $rd", []>;
-def SUB8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+def CMP8mr : I8mr<0b1001,
+ (outs), (ins memsrc:$dst, GR8:$rs),
+ "cmp.b\t$rs, $dst",
+ [(MSP430cmp (load addr:$dst), GR8:$rs),
(implicit SR)]>;
-def SUB16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def CMP16mr : I16mr<0b1001,
+ (outs), (ins memsrc:$dst, GR16:$rs),
+ "cmp\t$rs, $dst",
+ [(MSP430cmp (load addr:$dst), GR16:$rs),
(implicit SR)]>;
-
-def SUB8mm : I8mm<0x0,
+def CMP8mm : I8mm<0b1001,
(outs), (ins memdst:$dst, memsrc:$src),
- "sub.b\t{$src, $dst}",
- [(store (sub (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-def SUB16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "sub.w\t{$src, $dst}",
- [(store (sub (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
- (implicit SR)]>;
-}
-
-let Uses = [SR] in {
-def SBC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
- (implicit SR)]>;
-def SBC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
- (implicit SR)]>;
-
-def SBC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
- (implicit SR)]>;
-def SBC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
- (implicit SR)]>;
-
-def SBC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
- "subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+ "cmp.b\t$src, $dst",
+ [(MSP430cmp (load addr:$dst), (i8 (load addr:$src))),
(implicit SR)]>;
-def SBC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
- "subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+def CMP16mm : I16mm<0b1001, (outs), (ins memdst:$dst, memsrc:$src),
+ "cmp\t$src, $dst",
+ [(MSP430cmp (load addr:$dst), (i16 (load addr:$src))),
(implicit SR)]>;
-let Constraints = "" in {
-def SBC8mr : I8mr<0x0,
- (outs), (ins memdst:$dst, GR8:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SR)]>;
-def SBC16mr : I16mr<0x0,
- (outs), (ins memdst:$dst, GR16:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SR)]>;
+def CMP8mn : I8mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+ "cmp.b\t$rs, $dst", []>;
+def CMP16mn : I16mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+ "cmp\t$rs, $dst", []>;
-def SBC8mi : I8mi<0x0,
- (outs), (ins memdst:$dst, i8imm:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SR)]>;
-def SBC16mi : I16mi<0x0,
- (outs), (ins memdst:$dst, i16imm:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SR)]>;
+def CMP8mp : I8mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+ "cmp.b\t$rs, $dst", []>;
+def CMP16mp : I16mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+ "cmp\t$rs, $dst", []>;
-def SBC8mm : I8mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "subc.b\t{$src, $dst}",
- [(store (sube (load addr:$dst),
- (i8 (load addr:$src))), addr:$dst),
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : I8rr<0b1011,
+ (outs), (ins GR8:$rd, GR8:$rs),
+ "bit.b\t$rs, $rd",
+ [(MSP430cmp (and_su GR8:$rd, GR8:$rs), 0),
(implicit SR)]>;
-def SBC16mm : I16mm<0x0,
- (outs), (ins memdst:$dst, memsrc:$src),
- "subc.w\t{$src, $dst}",
- [(store (sube (load addr:$dst),
- (i16 (load addr:$src))), addr:$dst),
+def BIT16rr : I16rr<0b1011,
+ (outs), (ins GR16:$rd, GR16:$rs),
+ "bit\t$rs, $rd",
+ [(MSP430cmp (and_su GR16:$rd, GR16:$rs), 0),
(implicit SR)]>;
}
-
-} // Uses = [SR]
-
-// FIXME: memory variant!
-def SAR8r1 : II8r<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "rra.b\t$dst",
- [(set GR8:$dst, (MSP430rra GR8:$src)),
+def BIT8rc : I8rc<0b1011,
+ (outs), (ins GR8:$rd, cg8imm:$imm),
+ "bit.b\t$imm, $rd",
+ [(MSP430cmp (and_su GR8:$rd, cg8imm:$imm), 0),
(implicit SR)]>;
-def SAR16r1 : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "rra.w\t$dst",
- [(set GR16:$dst, (MSP430rra GR16:$src)),
+def BIT16rc : I16rc<0b1011,
+ (outs), (ins GR16:$rd, cg16imm:$imm),
+ "bit\t$imm, $rd",
+ [(MSP430cmp (and_su GR16:$rd, cg16imm:$imm), 0),
(implicit SR)]>;
-def SHL8r1 : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src),
- "rla.b\t$dst",
- [(set GR8:$dst, (MSP430rla GR8:$src)),
+def BIT8ri : I8ri<0b1011,
+ (outs), (ins GR8:$rd, i8imm:$imm),
+ "bit.b\t$imm, $rd",
+ [(MSP430cmp (and_su GR8:$rd, imm:$imm), 0),
(implicit SR)]>;
-def SHL16r1 : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "rla.w\t$dst",
- [(set GR16:$dst, (MSP430rla GR16:$src)),
- (implicit SR)]>;
-
-def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
- "clrc\n\t"
- "rrc.b\t$dst",
- [(set GR8:$dst, (MSP430rrc GR8:$src)),
- (implicit SR)]>;
-def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
- "clrc\n\t"
- "rrc.w\t$dst",
- [(set GR16:$dst, (MSP430rrc GR16:$src)),
- (implicit SR)]>;
-
-// FIXME: Memory sext's ?
-def SEXT16r : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "sxt\t$dst",
- [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+def BIT16ri : I16ri<0b1011,
+ (outs), (ins GR16:$rd, i16imm:$imm),
+ "bit\t$imm, $rd",
+ [(MSP430cmp (and_su GR16:$rd, imm:$imm), 0),
(implicit SR)]>;
-} // Defs = [SR]
-
-def ZEXT16r : I8rr<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "mov.b\t{$src, $dst}",
- [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
-
-// FIXME: Memory bitswaps?
-def SWPB16r : II16r<0x0,
- (outs GR16:$dst), (ins GR16:$src),
- "swpb\t$dst",
- [(set GR16:$dst, (bswap GR16:$src))]>;
-
-} // Constraints = "$src = $dst"
-
-// Integer comparisons
-let Defs = [SR] in {
-def CMP8rr : I8rr<0x0,
- (outs), (ins GR8:$src, GR8:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, GR8:$src2), (implicit SR)]>;
-def CMP16rr : I16rr<0x0,
- (outs), (ins GR16:$src, GR16:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, GR16:$src2), (implicit SR)]>;
-
-def CMP8ri : I8ri<0x0,
- (outs), (ins GR8:$src, i8imm:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, imm:$src2), (implicit SR)]>;
-def CMP16ri : I16ri<0x0,
- (outs), (ins GR16:$src, i16imm:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, imm:$src2), (implicit SR)]>;
-
-def CMP8mi : I8mi<0x0,
- (outs), (ins memsrc:$src, i8imm:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp (load addr:$src),
- (i8 imm:$src2)), (implicit SR)]>;
-def CMP16mi : I16mi<0x0,
- (outs), (ins memsrc:$src, i16imm:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp (load addr:$src),
- (i16 imm:$src2)), (implicit SR)]>;
-
-def CMP8rm : I8rm<0x0,
- (outs), (ins GR8:$src, memsrc:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, (load addr:$src2)),
+def BIT8rm : I8rm<0b1011,
+ (outs), (ins GR8:$rd, memdst:$src),
+ "bit.b\t$src, $rd",
+ [(MSP430cmp (and_su GR8:$rd, (load addr:$src)), 0),
(implicit SR)]>;
-def CMP16rm : I16rm<0x0,
- (outs), (ins GR16:$src, memsrc:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, (load addr:$src2)),
+def BIT16rm : I16rm<0b1011,
+ (outs), (ins GR16:$rd, memdst:$src),
+ "bit\t$src, $rd",
+ [(MSP430cmp (and_su GR16:$rd, (load addr:$src)), 0),
(implicit SR)]>;
-def CMP8mr : I8mr<0x0,
- (outs), (ins memsrc:$src, GR8:$src2),
- "cmp.b\t{$src2, $src}",
- [(MSP430cmp (load addr:$src), GR8:$src2),
- (implicit SR)]>;
-def CMP16mr : I16mr<0x0,
- (outs), (ins memsrc:$src, GR16:$src2),
- "cmp.w\t{$src2, $src}",
- [(MSP430cmp (load addr:$src), GR16:$src2),
- (implicit SR)]>;
+def BIT8rn : I8rn<0b1011, (outs), (ins GR8:$rd, indreg:$rs),
+ "bit.b\t$rs, $rd", []>;
+def BIT16rn : I16rn<0b1011, (outs), (ins GR16:$rd, indreg:$rs),
+ "bit\t$rs, $rd", []>;
+def BIT8rp : I8rp<0b1011, (outs), (ins GR8:$rd, postreg:$rs),
+ "bit.b\t$rs, $rd", []>;
+def BIT16rp : I16rp<0b1011, (outs), (ins GR16:$rd, postreg:$rs),
+ "bit\t$rs, $rd", []>;
-// BIT TESTS, just sets condition codes
-// Note that the C condition is set differently than when using CMP.
-let isCommutable = 1 in {
-def BIT8rr : I8rr<0x0,
- (outs), (ins GR8:$src, GR8:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
- (implicit SR)]>;
-def BIT16rr : I16rr<0x0,
- (outs), (ins GR16:$src, GR16:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
- (implicit SR)]>;
-}
-def BIT8ri : I8ri<0x0,
- (outs), (ins GR8:$src, i8imm:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
- (implicit SR)]>;
-def BIT16ri : I16ri<0x0,
- (outs), (ins GR16:$src, i16imm:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+def BIT8mr : I8mr<0b1011,
+ (outs), (ins memsrc:$dst, GR8:$rs),
+ "bit.b\t$rs, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), GR8:$rs), 0),
+ (implicit SR)]>;
+def BIT16mr : I16mr<0b1011,
+ (outs), (ins memsrc:$dst, GR16:$rs),
+ "bit\t$rs, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), GR16:$rs), 0),
(implicit SR)]>;
-def BIT8rm : I8rm<0x0,
- (outs), (ins GR8:$src, memdst:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
+def BIT8mc : I8mc<0b1011,
+ (outs), (ins memsrc:$dst, cg8imm:$imm),
+ "bit.b\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i8 cg8imm:$imm)), 0),
(implicit SR)]>;
-def BIT16rm : I16rm<0x0,
- (outs), (ins GR16:$src, memdst:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
+def BIT16mc : I16mc<0b1011,
+ (outs), (ins memdst:$dst, cg16imm:$imm),
+ "bit\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i16 cg16imm:$imm)), 0),
(implicit SR)]>;
-def BIT8mr : I8mr<0x0,
- (outs), (ins memsrc:$src, GR8:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
- (implicit SR)]>;
-def BIT16mr : I16mr<0x0,
- (outs), (ins memsrc:$src, GR16:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
- (implicit SR)]>;
-
-def BIT8mi : I8mi<0x0,
- (outs), (ins memsrc:$src, i8imm:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+def BIT8mi : I8mi<0b1011,
+ (outs), (ins memsrc:$dst, i8imm:$imm),
+ "bit.b\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i8 imm:$imm)), 0),
(implicit SR)]>;
-def BIT16mi : I16mi<0x0,
- (outs), (ins memsrc:$src, i16imm:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+def BIT16mi : I16mi<0b1011,
+ (outs), (ins memsrc:$dst, i16imm:$imm),
+ "bit\t$imm, $dst",
+ [(MSP430cmp (and_su (load addr:$dst), (i16 imm:$imm)), 0),
(implicit SR)]>;
-def BIT8mm : I8mm<0x0,
- (outs), (ins memsrc:$src, memsrc:$src2),
- "bit.b\t{$src2, $src}",
- [(MSP430cmp (and_su (i8 (load addr:$src)),
- (load addr:$src2)),
+def BIT8mm : I8mm<0b1011,
+ (outs), (ins memsrc:$dst, memsrc:$src),
+ "bit.b\t$src, $dst",
+ [(MSP430cmp (and_su (i8 (load addr:$dst)),
+ (load addr:$src)),
0),
(implicit SR)]>;
-def BIT16mm : I16mm<0x0,
- (outs), (ins memsrc:$src, memsrc:$src2),
- "bit.w\t{$src2, $src}",
- [(MSP430cmp (and_su (i16 (load addr:$src)),
- (load addr:$src2)),
+def BIT16mm : I16mm<0b1011,
+ (outs), (ins memsrc:$dst, memsrc:$src),
+ "bit\t$src, $dst",
+ [(MSP430cmp (and_su (i16 (load addr:$dst)),
+ (load addr:$src)),
0),
(implicit SR)]>;
+def BIT8mn : I8mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+ "bit.b\t$rs, $dst", []>;
+def BIT16mn : I16mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+ "bit\t$rs, $dst", []>;
+
+def BIT8mp : I8mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+ "bit.b\t$rs, $dst", []>;
+def BIT16mp : I16mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+ "bit\t$rs, $dst", []>;
+
} // Defs = [SR]
+def TST8r : InstAlias<"tst.b\t$dst", (CMP8rc GR8:$dst, 0)>;
+def TST16r : InstAlias<"tst\t$dst", (CMP16rc GR16:$dst, 0)>;
+def TST8m : InstAlias<"tst.b\t$dst", (CMP8mc memdst:$dst, 0)>;
+def TST16m : InstAlias<"tst\t$dst", (CMP16mc memdst:$dst, 0)>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index e7716382b222..860c0006f782 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -110,6 +110,9 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
return MCOperand::createExpr(Expr);
}
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index b5a6ed0f0a56..1e86bdf34a0b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -11,26 +11,31 @@
// Declarations that describe the MSP430 register file
//===----------------------------------------------------------------------===//
-class MSP430Reg<bits<4> num, string n> : Register<n> {
+class MSP430Reg<bits<4> num, string n, list<string> alt = []> : Register<n> {
field bits<4> Num = num;
let Namespace = "MSP430";
+ let HWEncoding{3-0} = num;
+ let AltNames = alt;
}
-class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
+ list<string> alt = []>
: RegisterWithSubRegs<n, subregs> {
field bits<4> Num = num;
let Namespace = "MSP430";
+ let HWEncoding{3-0} = num;
+ let AltNames = alt;
}
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
-def PCB : MSP430Reg<0, "r0">;
-def SPB : MSP430Reg<1, "r1">;
-def SRB : MSP430Reg<2, "r2">;
-def CGB : MSP430Reg<3, "r3">;
-def FPB : MSP430Reg<4, "r4">;
+def PCB : MSP430Reg<0, "r0", ["pc"]>;
+def SPB : MSP430Reg<1, "r1", ["sp"]>;
+def SRB : MSP430Reg<2, "r2", ["sr"]>;
+def CGB : MSP430Reg<3, "r3", ["cg"]>;
+def FPB : MSP430Reg<4, "r4", ["fp"]>;
def R5B : MSP430Reg<5, "r5">;
def R6B : MSP430Reg<6, "r6">;
def R7B : MSP430Reg<7, "r7">;
@@ -46,11 +51,11 @@ def R15B : MSP430Reg<15, "r15">;
def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
let SubRegIndices = [subreg_8bit] in {
-def PC : MSP430RegWithSubregs<0, "r0", [PCB]>;
-def SP : MSP430RegWithSubregs<1, "r1", [SPB]>;
-def SR : MSP430RegWithSubregs<2, "r2", [SRB]>;
-def CG : MSP430RegWithSubregs<3, "r3", [CGB]>;
-def FP : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def PC : MSP430RegWithSubregs<0, "r0", [PCB], ["pc"]>;
+def SP : MSP430RegWithSubregs<1, "r1", [SPB], ["sp"]>;
+def SR : MSP430RegWithSubregs<2, "r2", [SRB], ["sr"]>;
+def CG : MSP430RegWithSubregs<3, "r3", [CGB], ["cg"]>;
+def FP : MSP430RegWithSubregs<4, "r4", [FPB], ["fp"]>;
def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 01f44e266d7b..9f6ebba75ec6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,12 +32,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16";
@@ -51,7 +45,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ce7db657f5e9..d2fed6861477 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -64,6 +65,11 @@ class MCInstrInfo;
} // end namespace llvm
+static cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+ cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+ cl::init(true));
+
namespace {
class MipsAssemblerOptions {
@@ -195,7 +201,6 @@ class MipsAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
OperandMatchResultTy parseInvNum(OperandVector &Operands);
- OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
OperandMatchResultTy parseRegisterList(OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
@@ -760,7 +765,6 @@ private:
k_RegisterIndex, /// A register index in one or more RegKind.
k_Token, /// A simple token
k_RegList, /// A physical register list
- k_RegPair /// A pair of physical register
} Kind;
public:
@@ -769,16 +773,15 @@ public:
~MipsOperand() override {
switch (Kind) {
- case k_Immediate:
- break;
case k_Memory:
delete Mem.Base;
break;
case k_RegList:
delete RegList.List;
+ break;
+ case k_Immediate:
case k_RegisterIndex:
case k_Token:
- case k_RegPair:
break;
}
}
@@ -1038,6 +1041,17 @@ public:
Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
}
+ void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+ }
+
+ void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst,
+ unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+ }
+
/// Render the operand to an MCInst as a GPR64
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
@@ -1217,29 +1231,6 @@ public:
Inst.addOperand(MCOperand::createReg(RegNo));
}
- void addRegPairOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
- assert((RegIdx.Kind & RegKind_GPR) && "Invalid access!");
- unsigned RegNo = getRegPair();
- AsmParser.warnIfRegIndexIsAT(RegNo, StartLoc);
- Inst.addOperand(MCOperand::createReg(
- RegIdx.RegInfo->getRegClass(
- AsmParser.getABI().AreGprs64bit()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID).getRegister(RegNo++)));
- Inst.addOperand(MCOperand::createReg(
- RegIdx.RegInfo->getRegClass(
- AsmParser.getABI().AreGprs64bit()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID).getRegister(RegNo)));
- }
-
- void addMovePRegPairOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
- for (auto RegNo : getRegList())
- Inst.addOperand(MCOperand::createReg(RegNo));
- }
-
bool isReg() const override {
// As a special case until we sort out the definition of div/divu, accept
// $0/$zero here so that MCK_ZERO works correctly.
@@ -1406,34 +1397,6 @@ public:
bool isRegList() const { return Kind == k_RegList; }
- bool isMovePRegPair() const {
- if (Kind != k_RegList || RegList.List->size() != 2)
- return false;
-
- unsigned R0 = RegList.List->front();
- unsigned R1 = RegList.List->back();
-
- if ((R0 == Mips::A1 && R1 == Mips::A2) ||
- (R0 == Mips::A1 && R1 == Mips::A3) ||
- (R0 == Mips::A2 && R1 == Mips::A3) ||
- (R0 == Mips::A0 && R1 == Mips::S5) ||
- (R0 == Mips::A0 && R1 == Mips::S6) ||
- (R0 == Mips::A0 && R1 == Mips::A1) ||
- (R0 == Mips::A0 && R1 == Mips::A2) ||
- (R0 == Mips::A0 && R1 == Mips::A3) ||
- (R0 == Mips::A1_64 && R1 == Mips::A2_64) ||
- (R0 == Mips::A1_64 && R1 == Mips::A3_64) ||
- (R0 == Mips::A2_64 && R1 == Mips::A3_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::S5_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::S6_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A1_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A2_64) ||
- (R0 == Mips::A0_64 && R1 == Mips::A3_64))
- return true;
-
- return false;
- }
-
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -1481,11 +1444,6 @@ public:
return *(RegList.List);
}
- unsigned getRegPair() const {
- assert((Kind == k_RegPair) && "Invalid access!");
- return RegIdx.Index;
- }
-
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
@@ -1593,18 +1551,6 @@ public:
return Op;
}
- static std::unique_ptr<MipsOperand> CreateRegPair(const MipsOperand &MOP,
- SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- auto Op = llvm::make_unique<MipsOperand>(k_RegPair, Parser);
- Op->RegIdx.Index = MOP.RegIdx.Index;
- Op->RegIdx.RegInfo = MOP.RegIdx.RegInfo;
- Op->RegIdx.Kind = MOP.RegIdx.Kind;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
bool isGPRZeroAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
}
@@ -1640,6 +1586,19 @@ public:
(RegIdx.Index >= 16 && RegIdx.Index <= 20));
}
+ bool isMM16AsmRegMovePPairFirst() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return RegIdx.Index >= 4 && RegIdx.Index <= 6;
+ }
+
+ bool isMM16AsmRegMovePPairSecond() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return (RegIdx.Index == 21 || RegIdx.Index == 22 ||
+ (RegIdx.Index >= 5 && RegIdx.Index <= 7));
+ }
+
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -1720,9 +1679,6 @@ public:
OS << Reg << " ";
OS << ">";
break;
- case k_RegPair:
- OS << "RegPair<" << RegIdx.Index << "," << RegIdx.Index + 1 << ">";
- break;
}
}
@@ -1755,14 +1711,23 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
-static bool hasShortDelaySlot(unsigned Opcode) {
- switch (Opcode) {
+static bool hasShortDelaySlot(MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ case Mips::BEQ_MM:
+ case Mips::BNE_MM:
+ case Mips::BLTZ_MM:
+ case Mips::BGEZ_MM:
+ case Mips::BLEZ_MM:
+ case Mips::BGTZ_MM:
+ case Mips::JRC16_MM:
case Mips::JALS_MM:
case Mips::JALRS_MM:
case Mips::JALRS16_MM:
case Mips::BGEZALS_MM:
case Mips::BLTZALS_MM:
return true;
+ case Mips::J_MM:
+ return !Inst.getOperand(0).isReg();
default:
return false;
}
@@ -2115,9 +2080,21 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
JalrInst.addOperand(MCOperand::createReg(Mips::RA));
JalrInst.addOperand(MCOperand::createReg(Mips::T9));
- // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
- // This relocation is supposed to be an optimization hint for the linker
- // and is not necessary for correctness.
+ if (EmitJalrReloc) {
+ // As an optimization hint for the linker, before the JALR we add:
+ // .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+ // tmplabel:
+ MCSymbol *TmpLabel = getContext().createTempSymbol();
+ const MCExpr *TmpExpr = MCSymbolRefExpr::create(TmpLabel, getContext());
+ const MCExpr *RelocJalrExpr =
+ MCSymbolRefExpr::create(JalSym, MCSymbolRefExpr::VK_None,
+ getContext(), IDLoc);
+
+ TOut.getStreamer().EmitRelocDirective(*TmpExpr,
+ inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+ RelocJalrExpr, IDLoc, *STI);
+ TOut.getStreamer().EmitLabel(TmpLabel);
+ }
Inst = JalrInst;
ExpandedJalSym = true;
@@ -2288,6 +2265,22 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Inst.getOperand(0).getReg() == Mips::RA)
return Error(IDLoc, "invalid operand for instruction");
break;
+ case Mips::MOVEP_MM:
+ case Mips::MOVEP_MMR6: {
+ unsigned R0 = Inst.getOperand(0).getReg();
+ unsigned R1 = Inst.getOperand(1).getReg();
+ bool RegPair = ((R0 == Mips::A1 && R1 == Mips::A2) ||
+ (R0 == Mips::A1 && R1 == Mips::A3) ||
+ (R0 == Mips::A2 && R1 == Mips::A3) ||
+ (R0 == Mips::A0 && R1 == Mips::S5) ||
+ (R0 == Mips::A0 && R1 == Mips::S6) ||
+ (R0 == Mips::A0 && R1 == Mips::A1) ||
+ (R0 == Mips::A0 && R1 == Mips::A2) ||
+ (R0 == Mips::A0 && R1 == Mips::A3));
+ if (!RegPair)
+ return Error(IDLoc, "invalid operand for instruction");
+ break;
+ }
}
}
@@ -2318,7 +2311,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
if (FillDelaySlot) {
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc, STI);
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc, STI);
TOut.emitDirectiveSetReorder();
}
@@ -2330,7 +2323,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// If .set reorder has been used, we've already emitted a NOP.
// If .set noreorder has been used, we need to emit a NOP at this point.
if (!AssemblerOptions.back()->isReorder())
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc,
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc,
STI);
// Load the $gp from the stack.
@@ -2617,7 +2610,7 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
// emit a NOP after it.
const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
- TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc,
+ TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst), IDLoc,
STI);
return false;
@@ -6278,45 +6271,6 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
return MatchOperand_Success;
}
-OperandMatchResultTy
-MipsAsmParser::parseMovePRegPair(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
- SmallVector<unsigned, 10> Regs;
-
- if (Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_ParseFail;
-
- SMLoc S = Parser.getTok().getLoc();
-
- if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
- return MatchOperand_ParseFail;
-
- MipsOperand *Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
- unsigned RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
- Regs.push_back(RegNo);
-
- SMLoc E = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(E, "',' expected");
- return MatchOperand_ParseFail;
- }
-
- // Remove comma.
- Parser.Lex();
-
- if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
- return MatchOperand_ParseFail;
-
- Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
- RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
- Regs.push_back(RegNo);
-
- Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
-
- return MatchOperand_Success;
-}
-
/// Sometimes (i.e. load/stores) the operand may be followed immediately by
/// either this.
/// ::= '(', register, ')'
@@ -6371,6 +6325,9 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
return false;
}
+static std::string MipsMnemonicSpellCheck(StringRef S, uint64_t FBS,
+ unsigned VariantID = 0);
+
bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
@@ -6381,7 +6338,9 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
- return Error(NameLoc, "unknown instruction");
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = MipsMnemonicSpellCheck(Name, FBS);
+ return Error(NameLoc, "unknown instruction" + Suggestion);
}
// First operand in MCInst is instruction mnemonic.
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc, *this));
@@ -8257,6 +8216,7 @@ extern "C" void LLVMInitializeMipsAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
#include "MipsGenAsmMatcher.inc"
bool MipsAsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index b94afb9520e3..27b27ff1e1e2 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -538,6 +538,9 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
namespace llvm {
Target &getTheMipselTarget();
@@ -2450,6 +2453,32 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
+ if (DecodeMovePRegPair(Inst, RegPair, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ unsigned RegRs;
+ if (static_cast<const MipsDisassembler*>(Decoder)->hasMips32r6())
+ RegRs = fieldFromInstruction(Insn, 0, 2) |
+ (fieldFromInstruction(Insn, 3, 1) << 2);
+ else
+ RegRs = fieldFromInstruction(Insn, 1, 3);
+ if (DecodeGPRMM16MovePRegisterClass(Inst, RegRs, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ unsigned RegRt = fieldFromInstruction(Insn, 4, 3);
+ if (DecodeGPRMM16MovePRegisterClass(Inst, RegRt, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address, const void *Decoder) {
switch (RegPair) {
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index bf1390880281..18d7dd99be34 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -55,6 +55,8 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
return MipsABIInfo::N32();
if (Options.getABIName().startswith("n64"))
return MipsABIInfo::N64();
+ if (TT.getEnvironment() == llvm::Triple::GNUABIN32)
+ return MipsABIInfo::N32();
assert(Options.getABIName().empty() && "Unknown ABI option for MIPS");
if (TT.isMIPS64())
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 3b1b94acb149..7d528fe3eab1 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -13,6 +13,7 @@
//
#include "MCTargetDesc/MipsAsmBackend.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
@@ -339,6 +340,8 @@ Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
(MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_HI16)
.Case("R_MICROMIPS_TLS_TPREL_LO16",
(MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_LO16)
+ .Case("R_MIPS_JALR", (MCFixupKind)Mips::fixup_Mips_JALR)
+ .Case("R_MICROMIPS_JALR", (MCFixupKind)Mips::fixup_MICROMIPS_JALR)
.Default(MCAsmBackend::getFixupKind(Name));
}
@@ -417,7 +420,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 },
{ "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 },
{ "fixup_Mips_SUB", 0, 64, 0 },
- { "fixup_MICROMIPS_SUB", 0, 64, 0 }
+ { "fixup_MICROMIPS_SUB", 0, 64, 0 },
+ { "fixup_Mips_JALR", 0, 32, 0 },
+ { "fixup_MICROMIPS_JALR", 0, 32, 0 }
};
static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds,
"Not all MIPS little endian fixup kinds added!");
@@ -495,7 +500,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 },
{ "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 },
{ "fixup_Mips_SUB", 0, 64, 0 },
- { "fixup_MICROMIPS_SUB", 0, 64, 0 }
+ { "fixup_MICROMIPS_SUB", 0, 64, 0 },
+ { "fixup_Mips_JALR", 0, 32, 0 },
+ { "fixup_MICROMIPS_JALR", 0, 32, 0 }
};
static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
"Not all MIPS big endian fixup kinds added!");
@@ -553,6 +560,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case Mips::fixup_Mips_TLSLDM:
case Mips::fixup_Mips_TPREL_HI:
case Mips::fixup_Mips_TPREL_LO:
+ case Mips::fixup_Mips_JALR:
case Mips::fixup_MICROMIPS_CALL16:
case Mips::fixup_MICROMIPS_GOT_DISP:
case Mips::fixup_MICROMIPS_GOT_PAGE:
@@ -565,6 +573,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
case Mips::fixup_MICROMIPS_TLS_LDM:
case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
+ case Mips::fixup_MICROMIPS_JALR:
return true;
}
}
@@ -581,6 +590,6 @@ MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
- return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
- Options.ABIName == "n32");
+ MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(), STI.getCPU(), Options);
+ return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ABI.IsN32());
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 3dc753772e5f..8ace2895d681 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -225,7 +226,9 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
case Mips::fixup_Mips_NONE:
return ELF::R_MIPS_NONE;
case FK_Data_1:
- report_fatal_error("MIPS does not support one byte relocations");
+ Ctx.reportError(Fixup.getLoc(),
+ "MIPS does not support one byte relocations");
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_16:
case FK_Data_2:
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -236,6 +239,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsPCRel) {
switch (Kind) {
+ case FK_Data_8:
+ Ctx.reportError(Fixup.getLoc(),
+ "MIPS does not support 64-bit PC-relative relocations");
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_Branch_PCRel:
case Mips::fixup_Mips_PC16:
return ELF::R_MIPS_PC16;
@@ -401,6 +408,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_MICROMIPS_HIGHER;
case Mips::fixup_MICROMIPS_HIGHEST:
return ELF::R_MICROMIPS_HIGHEST;
+ case Mips::fixup_Mips_JALR:
+ return ELF::R_MIPS_JALR;
+ case Mips::fixup_MICROMIPS_JALR:
+ return ELF::R_MICROMIPS_JALR;
}
llvm_unreachable("invalid fixup kind!");
@@ -453,7 +464,7 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
return;
// Sort relocations by the address they are applied to.
- llvm::sort(Relocs.begin(), Relocs.end(),
+ llvm::sort(Relocs,
[](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
return A.Offset < B.Offset;
});
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index d7f6cf91db73..eedad16dddc3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -222,6 +222,10 @@ namespace Mips {
fixup_Mips_SUB,
fixup_MICROMIPS_SUB,
+ // resulting in - R_MIPS_JALR/R_MICROMIPS_JALR
+ fixup_Mips_JALR,
+ fixup_MICROMIPS_JALR,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index f498d830c8f0..1506b4a83649 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -21,9 +21,8 @@ void MipsMCAsmInfo::anchor() { }
MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
IsLittleEndian = TheTriple.isLittleEndian();
- if (TheTriple.isMIPS64()) {
+ if (TheTriple.isMIPS64() && TheTriple.getEnvironment() != Triple::GNUABIN32)
CodePointerSize = CalleeSaveStackSlotSize = 8;
- }
// FIXME: This condition isn't quite right but it's the best we can do until
// this object can identify the ABI. It will misbehave when using O32
@@ -50,21 +49,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
HasMipsExpressions = true;
-
- // Enable IAS by default for O32.
- if (TheTriple.isMIPS32())
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for Debian mips64/mips64el.
- if (TheTriple.getEnvironment() == Triple::GNUABI64)
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for Android mips64el that uses N64 ABI.
- if (TheTriple.getArch() == Triple::mips64el && TheTriple.isAndroid())
- UseIntegratedAssembler = true;
-
- // Enable IAS by default for FreeBSD / OpenBSD mips64/mips64el.
- if (TheTriple.isOSFreeBSD() ||
- TheTriple.isOSOpenBSD())
- UseIntegratedAssembler = true;
+ UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index cd34b0ab70b4..f43a4d980f92 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -213,6 +213,12 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
TmpInst.setOpcode (NewOpcode);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
}
+
+ if (((MI.getOpcode() == Mips::MOVEP_MM) ||
+ (MI.getOpcode() == Mips::MOVEP_MMR6))) {
+ unsigned RegPair = getMovePRegPairOpValue(MI, 0, Fixups, STI);
+ Binary = (Binary & 0xFFFFFC7F) | (RegPair << 7);
+ }
}
const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
@@ -607,6 +613,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
case MipsMCExpr::MEK_Special:
llvm_unreachable("Unhandled fixup kind!");
break;
+ case MipsMCExpr::MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MipsMCExpr::MEK_CALL_HI16:
FixupKind = Mips::fixup_Mips_CALL_HI16;
break;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 0bddba781453..99857e083c6c 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -43,6 +43,9 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MEK_CALL_HI16:
OS << "%call_hi";
break;
@@ -157,6 +160,8 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
case MEK_None:
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
case MEK_DTPREL_HI:
case MEK_DTPREL_LO:
case MEK_GOT:
@@ -244,6 +249,9 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
+ case MEK_DTPREL:
+ llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ break;
case MEK_CALL_HI16:
case MEK_CALL_LO16:
case MEK_GOT:
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 495d525ccff4..bf3274ab5d17 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -22,6 +22,7 @@ public:
MEK_None,
MEK_CALL_HI16,
MEK_CALL_LO16,
+ MEK_DTPREL,
MEK_DTPREL_HI,
MEK_DTPREL_LO,
MEK_GOT,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index ce208b7f98bc..a8cd7b0d9b03 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -47,10 +47,17 @@ using namespace llvm;
/// FIXME: Merge with the copy in MipsSubtarget.cpp
StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
if (CPU.empty() || CPU == "generic") {
- if (TT.isMIPS32())
- CPU = "mips32";
- else
- CPU = "mips64";
+ if (TT.getSubArch() == llvm::Triple::MipsSubArch_r6) {
+ if (TT.isMIPS32())
+ CPU = "mips32r6";
+ else
+ CPU = "mips64r6";
+ } else {
+ if (TT.isMIPS32())
+ CPU = "mips32";
+ else
+ CPU = "mips64";
+ }
}
return CPU;
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 1eb21b6cc826..58f9717e1cc6 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -248,7 +248,11 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc,
}
void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
- emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ const FeatureBitset &Features = STI->getFeatureBits();
+ if (Features[Mips::FeatureMicroMips])
+ emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI);
+ else
+ emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
}
/// Emit the $gp restore operation for .cprestore.
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 6b0aa7756eab..814918d25e70 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -159,6 +159,7 @@ class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
+class SIGRIE_MMR6_ENC : SIGRIE_FM_MM, MMR6Arch<"sigrie">;
class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
@@ -1101,7 +1102,9 @@ class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">
class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
MMR6Arch<"li16">, IsAsCheapAsAMove;
class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">;
-class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">;
+class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+ GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+ MMR6Arch<"movep">;
class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">;
class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
MMR6Arch<"subu16"> {
@@ -1160,6 +1163,14 @@ class SDBBP_MMR6_DESC : MipsR6Inst {
InstrItinClass Itinerary = II_SDBBP;
}
+class SIGRIE_MMR6_DESC : MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm16:$code_);
+ string AsmString = !strconcat("sigrie", "\t$code_");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_SIGRIE;
+}
+
class LWM16_MMR6_DESC
: MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
!strconcat("lwm16", "\t$rt, $addr"), [],
@@ -1425,6 +1436,7 @@ def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
ISA_MICROMIPS32R6;
def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC, ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
let DecoderMethod = "DecodeMemMMImm16" in {
@@ -1633,6 +1645,7 @@ def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
}
def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE_MMR6 0), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"rdhwr $rt, $rs",
(RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 84ae0eddf980..1731afc1961f 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -243,6 +243,8 @@ let DecoderNamespace = "MicroMipsFP64" in {
MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_64;
def MFHC1_D64_MM : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>,
MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_64;
+ def MTC1_D64_MM : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>,
+ MFC1_FM_MM<0xa0>, ISA_MICROMIPS, FGR_64;
}
let DecoderNamespace = "MicroMips" in {
@@ -405,6 +407,9 @@ let AddedComplexity = 40 in {
def : StoreRegImmPat<SWC1_MM, f32>, ISA_MICROMIPS;
}
+def : MipsPat<(MipsMTC1_D64 GPR32Opnd:$src),
+ (MTC1_D64_MM GPR32Opnd:$src)>, ISA_MICROMIPS, FGR_64;
+
def : MipsPat<(f32 fpimm0), (MTC1_MM ZERO)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MM (MTC1_MM ZERO))>,
ISA_MICROMIPS32_NOT_MIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index a9c53e08b810..2a4cc279ef0d 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -933,6 +933,17 @@ class SDBBP_FM_MM : MMArch {
let Inst{5-0} = 0x3c;
}
+class SIGRIE_FM_MM : MMArch {
+ bits<16> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-22} = 0x0;
+ let Inst{21-6} = code_;
+ let Inst{5-0} = 0b111111;
+}
+
class RDHWR_FM_MM : MMArch {
bits<5> rt;
bits<5> rd;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index ebadb59a0432..af380a0ec71e 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -231,27 +231,14 @@ class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
bit mayStore = 1;
}
-/// A register pair used by movep instruction.
-def MovePRegPairAsmOperand : AsmOperandClass {
- let Name = "MovePRegPair";
- let ParserMethod = "parseMovePRegPair";
- let PredicateMethod = "isMovePRegPair";
-}
-
-def movep_regpair : Operand<i32> {
- let EncoderMethod = "getMovePRegPairOpValue";
- let ParserMatchClass = MovePRegPairAsmOperand;
- let PrintMethod = "printRegisterList";
- let DecoderMethod = "DecodeMovePRegPair";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
-}
-
-class MovePMM16<string opstr, RegisterOperand RO> :
-MicroMipsInst16<(outs movep_regpair:$dst_regs), (ins RO:$rs, RO:$rt),
- !strconcat(opstr, "\t$dst_regs, $rs, $rt"), [],
+class MovePMM16<string opstr, RegisterOperand RO1, RegisterOperand RO2,
+ RegisterOperand RO3> :
+MicroMipsInst16<(outs RO1:$rd1, RO2:$rd2), (ins RO3:$rs, RO3:$rt),
+ !strconcat(opstr, "\t$rd1, $rd2, $rs, $rt"), [],
NoItinerary, FrmR> {
let isReMaterializable = 1;
let isMoveReg = 1;
+ let DecoderMethod = "DecodeMovePOperands";
}
class StorePairMM<string opstr, ComplexPattern Addr = addr>
@@ -682,8 +669,9 @@ def MFLO16_MM : MoveFromHILOMM<"mflo16", GPR32Opnd, AC0>,
MFHILO_FM_MM16<0x12>, ISA_MICROMIPS32_NOT_MIPS32R6;
def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>,
ISA_MICROMIPS32_NOT_MIPS32R6;
-def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16,
- ISA_MICROMIPS32_NOT_MIPS32R6;
+def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+ GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+ MOVEP_FM_MM16, ISA_MICROMIPS32_NOT_MIPS32R6;
def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16,
IsAsCheapAsAMove, ISA_MICROMIPS32_NOT_MIPS32R6;
def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
@@ -1116,6 +1104,27 @@ let DecoderNamespace = "MicroMips" in {
ISA_MICROMIPS32_NOT_MIPS32R6;
}
+let AdditionalPredicates = [NotDSP] in {
+ def PseudoMULT_MM : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMULTu_MM : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMFHI_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMFLO_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMTLOHI_MM : PseudoMTLOHI<ACC64, GPR32>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMADD_MM : MAddSubPseudo<MADD, MipsMAdd, II_MADD>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMADDU_MM : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMSUB_MM : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def PseudoMSUBU_MM : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+}
+
def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>,
@@ -1262,6 +1271,8 @@ let AddedComplexity = 40 in
def : MipsPat<(bswap GPR32:$rt), (ROTR_MM (WSBH_MM GPR32:$rt), 16)>,
ISA_MICROMIPS;
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL_MM texternalsym:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
(TAILCALL_MM tglobaladdr:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 568cdfb5b110..f9062cc23da2 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -31,13 +31,14 @@ namespace {
/// Order of operands to transfer
// TODO: Will be extended when additional optimizations are added
enum OperandTransfer {
- OT_NA, ///< Not applicable
- OT_OperandsAll, ///< Transfer all operands
- OT_Operands02, ///< Transfer operands 0 and 2
- OT_Operand2, ///< Transfer just operand 2
- OT_OperandsXOR, ///< Transfer operands for XOR16
- OT_OperandsLwp, ///< Transfer operands for LWP
- OT_OperandsSwp, ///< Transfer operands for SWP
+ OT_NA, ///< Not applicable
+ OT_OperandsAll, ///< Transfer all operands
+ OT_Operands02, ///< Transfer operands 0 and 2
+ OT_Operand2, ///< Transfer just operand 2
+ OT_OperandsXOR, ///< Transfer operands for XOR16
+ OT_OperandsLwp, ///< Transfer operands for LWP
+ OT_OperandsSwp, ///< Transfer operands for SWP
+ OT_OperandsMovep, ///< Transfer operands for MOVEP
};
/// Reduction type
@@ -170,6 +171,10 @@ private:
// returns true on success.
static bool ReduceSXtoSX16(ReduceEntryFunArgs *Arguments);
+ // Attempts to reduce two MOVE instructions into MOVEP instruction,
+ // returns true on success.
+ static bool ReduceMoveToMovep(ReduceEntryFunArgs *Arguments);
+
// Attempts to reduce arithmetic instructions, returns true on success.
static bool ReduceArithmeticInstructions(ReduceEntryFunArgs *Arguments);
@@ -243,6 +248,8 @@ ReduceEntryVector MicroMipsSizeReduce::ReduceTable = {
OpInfo(OT_OperandsLwp), ImmField(0, -2048, 2048, 2)},
{RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+ {RT_TwoInstr, OpCodes(Mips::MOVE16_MM, Mips::MOVEP_MM), ReduceMoveToMovep,
+ OpInfo(OT_OperandsMovep), ImmField(0, 0, 0, -1)},
{RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16,
@@ -562,6 +569,89 @@ bool MicroMipsSizeReduce::ReduceSXtoSX16(ReduceEntryFunArgs *Arguments) {
return ReplaceInstruction(MI, Entry);
}
+// Returns true if Reg can be a source register
+// of MOVEP instruction
+static bool IsMovepSrcRegister(unsigned Reg) {
+
+ if (Reg == Mips::ZERO || Reg == Mips::V0 || Reg == Mips::V1 ||
+ Reg == Mips::S0 || Reg == Mips::S1 || Reg == Mips::S2 ||
+ Reg == Mips::S3 || Reg == Mips::S4)
+ return true;
+
+ return false;
+}
+
+// Returns true if Reg can be a destination register
+// of MOVEP instruction
+static bool IsMovepDestinationReg(unsigned Reg) {
+
+ if (Reg == Mips::A0 || Reg == Mips::A1 || Reg == Mips::A2 ||
+ Reg == Mips::A3 || Reg == Mips::S5 || Reg == Mips::S6)
+ return true;
+
+ return false;
+}
+
+// Returns true if the registers can be a pair of destination
+// registers in MOVEP instruction
+static bool IsMovepDestinationRegPair(unsigned R0, unsigned R1) {
+
+ if ((R0 == Mips::A0 && R1 == Mips::S5) ||
+ (R0 == Mips::A0 && R1 == Mips::S6) ||
+ (R0 == Mips::A0 && R1 == Mips::A1) ||
+ (R0 == Mips::A0 && R1 == Mips::A2) ||
+ (R0 == Mips::A0 && R1 == Mips::A3) ||
+ (R0 == Mips::A1 && R1 == Mips::A2) ||
+ (R0 == Mips::A1 && R1 == Mips::A3) ||
+ (R0 == Mips::A2 && R1 == Mips::A3))
+ return true;
+
+ return false;
+}
+
+bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) {
+
+ const ReduceEntry &Entry = Arguments->Entry;
+ MachineBasicBlock::instr_iterator &NextMII = Arguments->NextMII;
+ const MachineBasicBlock::instr_iterator &E =
+ Arguments->MI->getParent()->instr_end();
+
+ if (NextMII == E)
+ return false;
+
+ MachineInstr *MI1 = Arguments->MI;
+ MachineInstr *MI2 = &*NextMII;
+
+ unsigned RegDstMI1 = MI1->getOperand(0).getReg();
+ unsigned RegSrcMI1 = MI1->getOperand(1).getReg();
+
+ if (!IsMovepSrcRegister(RegSrcMI1))
+ return false;
+
+ if (!IsMovepDestinationReg(RegDstMI1))
+ return false;
+
+ if (MI2->getOpcode() != Entry.WideOpc())
+ return false;
+
+ unsigned RegDstMI2 = MI2->getOperand(0).getReg();
+ unsigned RegSrcMI2 = MI2->getOperand(1).getReg();
+
+ if (!IsMovepSrcRegister(RegSrcMI2))
+ return false;
+
+ bool ConsecutiveForward;
+ if (IsMovepDestinationRegPair(RegDstMI1, RegDstMI2)) {
+ ConsecutiveForward = true;
+ } else if (IsMovepDestinationRegPair(RegDstMI2, RegDstMI1)) {
+ ConsecutiveForward = false;
+ } else
+ return false;
+
+ NextMII = std::next(NextMII);
+ return ReplaceInstruction(MI1, Entry, MI2, ConsecutiveForward);
+}
+
bool MicroMipsSizeReduce::ReduceXORtoXOR16(ReduceEntryFunArgs *Arguments) {
MachineInstr *MI = Arguments->MI;
@@ -641,18 +731,25 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
}
break;
}
+ case OT_OperandsMovep:
case OT_OperandsLwp:
case OT_OperandsSwp: {
if (ConsecutiveForward) {
MIB.add(MI->getOperand(0));
MIB.add(MI2->getOperand(0));
MIB.add(MI->getOperand(1));
- MIB.add(MI->getOperand(2));
+ if (OpTransfer == OT_OperandsMovep)
+ MIB.add(MI2->getOperand(1));
+ else
+ MIB.add(MI->getOperand(2));
} else { // consecutive backward
MIB.add(MI2->getOperand(0));
MIB.add(MI->getOperand(0));
MIB.add(MI2->getOperand(1));
- MIB.add(MI2->getOperand(2));
+ if (OpTransfer == OT_OperandsMovep)
+ MIB.add(MI->getOperand(1));
+ else
+ MIB.add(MI2->getOperand(2));
}
LLVM_DEBUG(dbgs() << "and converting 32-bit: " << *MI2
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index ef3a807c7648..6bb7aecc867a 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
FunctionPass *createMipsConstantIslandPass();
FunctionPass *createMicroMipsSizeReducePass();
FunctionPass *createMipsExpandPseudoPass();
+ FunctionPass *createMipsPreLegalizeCombiner();
InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
MipsSubtarget &,
@@ -46,6 +47,7 @@ namespace llvm {
void initializeMipsDelaySlotFillerPass(PassRegistry &);
void initializeMipsBranchExpansionPass(PassRegistry &);
void initializeMicroMipsSizeReducePass(PassRegistry &);
+ void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index c310d9491af8..f237bb6d4006 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -74,16 +74,18 @@ static FPReturnVariant whichFPReturnVariant(Type *T) {
return FRet;
case Type::DoubleTyID:
return DRet;
- case Type::StructTyID:
- if (T->getStructNumElements() != 2)
+ case Type::StructTyID: {
+ StructType *ST = cast<StructType>(T);
+ if (ST->getNumElements() != 2)
break;
- if ((T->getContainedType(0)->isFloatTy()) &&
- (T->getContainedType(1)->isFloatTy()))
+ if ((ST->getElementType(0)->isFloatTy()) &&
+ (ST->getElementType(1)->isFloatTy()))
return CFRet;
- if ((T->getContainedType(0)->isDoubleTy()) &&
- (T->getContainedType(1)->isDoubleTy()))
+ if ((ST->getElementType(0)->isDoubleTy()) &&
+ (ST->getElementType(1)->isDoubleTy()))
return CDRet;
break;
+ }
default:
break;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 8ce47e3f669d..79df622241a0 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -386,27 +386,22 @@ const char* Mips16TargetLowering::
}
else if (RetTy ->isDoubleTy()) {
result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
+ } else if (StructType *SRetTy = dyn_cast<StructType>(RetTy)) {
// check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
+ if (SRetTy->getNumElements() == 2) {
+ if ((SRetTy->getElementType(0)->isFloatTy()) &&
+ (SRetTy->getElementType(1)->isFloatTy())) {
result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
+ } else if ((SRetTy->getElementType(0)->isDoubleTy()) &&
+ (SRetTy->getElementType(1)->isDoubleTy())) {
result = dcMips16Helper[stubNum];
- }
- else {
+ } else {
llvm_unreachable("Uncovered condition");
}
- }
- else {
+ } else {
llvm_unreachable("Uncovered condition");
}
- }
- else {
+ } else {
if (stubNum == 0) {
needHelper = false;
return "";
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 219f1ad33586..efebc99b5dae 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -97,9 +97,9 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool Mips16InstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
if (MI.isMoveReg()) {
Dest = &MI.getOperand(0);
Src = &MI.getOperand(1);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 8190be6187ea..6a802e4cce5d 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -53,9 +53,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -105,6 +102,14 @@ public:
void BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
+protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index e1d08cad88b7..623af570a5e6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -87,6 +87,7 @@ def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+def OPCODE5_SIGRIE : OPCODE5<0b10111>;
// The next four constants are unnamed in the spec. These names are taken from
// the OPGROUP names they are used with.
def OPCODE5_LDC2 : OPCODE5<0b01110>;
@@ -602,3 +603,12 @@ class SPECIAL3_GINV<bits<2> ginv> : MipsR6Inst {
let Inst{7-6} = ginv;
let Inst{5-0} = 0b111101;
}
+
+class SIGRIE_FM : MipsR6Inst {
+ bits<16> code_;
+
+ let Inst{31-26} = OPGROUP_REGIMM.Value;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = OPCODE5_SIGRIE.Value;
+ let Inst{15-0} = code_;
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index d86fc3f658ae..2bd0cf2d59a6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -200,6 +200,8 @@ class CRC32CW_ENC : SPECIAL3_2R_SZ_CRC<2,1>;
class GINVI_ENC : SPECIAL3_GINV<0>;
class GINVT_ENC : SPECIAL3_GINV<2>;
+class SIGRIE_ENC : SIGRIE_FM;
+
//===----------------------------------------------------------------------===//
//
// Instruction Multiclasses
@@ -846,6 +848,14 @@ class GINVI_DESC : GINV_DESC_BASE<"ginvi", GPR32Opnd, II_GINVI> {
}
class GINVT_DESC : GINV_DESC_BASE<"ginvt", GPR32Opnd, II_GINVT>;
+class SIGRIE_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm16:$code_);
+ string AsmString = "sigrie\t$code_";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_SIGRIE;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
@@ -961,6 +971,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
def SEL_S : R6MMR6Rel, SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
+ def SIGRIE : SIGRIE_ENC, SIGRIE_DESC, ISA_MIPS32R6;
}
let AdditionalPredicates = [NotInMicroMips] in {
@@ -988,6 +999,7 @@ def : MipsInstAlias<"evp", (EVP ZERO), 0>, ISA_MIPS32R6;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE 0)>, ISA_MIPS32R6;
def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6, GPR_32;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index b5317bec70c4..5729182deafb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -416,6 +416,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
// long branches. See the comment in file MipsLongBranch.cpp for detailed
// explanation.
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op_64 : PseudoSE<(outs GPR64Opnd:$dst),
+ (ins brtarget:$tgt), []>, GPR_64;
+// Expands to: addiu $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_DADDiu2Op : PseudoSE<(outs GPR64Opnd:$dst),
+ (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
+
// Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
// where %PART may be %hi or %lo, depending on the relocation kind
// that $tgt is annotated with.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 2e0c25de2bc8..362431fd42a6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -561,6 +561,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
O << '$' << MipsInstPrinter::getRegisterName(Reg);
return false;
}
+ break;
}
case 'w':
// Print MSA registers for the 'f' constraint
@@ -1203,18 +1204,23 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Emit .dtprelword or .dtpreldword directive
// and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
- unsigned Size) const {
- switch (Size) {
- case 4:
- OutStreamer->EmitDTPRel32Value(Value);
- break;
- case 8:
- OutStreamer->EmitDTPRel64Value(Value);
- break;
- default:
- llvm_unreachable("Unexpected size of expression value.");
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
+ if (auto *MipsExpr = dyn_cast<MipsMCExpr>(Value)) {
+ if (MipsExpr && MipsExpr->getKind() == MipsMCExpr::MEK_DTPREL) {
+ switch (Size) {
+ case 4:
+ OutStreamer->EmitDTPRel32Value(MipsExpr->getSubExpr());
+ break;
+ case 8:
+ OutStreamer->EmitDTPRel64Value(MipsExpr->getSubExpr());
+ break;
+ default:
+ llvm_unreachable("Unexpected size of expression value.");
+ }
+ return;
+ }
}
+ AsmPrinter::EmitDebugValue(Value, Size);
}
// Align all targets of indirect branches on bundle size. Used only if target
@@ -1240,8 +1246,12 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
return (Opcode == Mips::LONG_BRANCH_LUi
+ || Opcode == Mips::LONG_BRANCH_LUi2Op
+ || Opcode == Mips::LONG_BRANCH_LUi2Op_64
|| Opcode == Mips::LONG_BRANCH_ADDiu
- || Opcode == Mips::LONG_BRANCH_DADDiu);
+ || Opcode == Mips::LONG_BRANCH_ADDiu2Op
+ || Opcode == Mips::LONG_BRANCH_DADDiu
+ || Opcode == Mips::LONG_BRANCH_DADDiu2Op);
}
// Force static initialization.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 999b6f896bae..eb58234e3e77 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -160,7 +160,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const override;
+ void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index af936e6fc96b..e59267c4fd9b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -128,6 +128,7 @@ struct MBBInfo {
uint64_t Size = 0;
bool HasLongBranch = false;
MachineInstr *Br = nullptr;
+ uint64_t Offset = 0;
MBBInfo() = default;
};
@@ -154,8 +155,11 @@ private:
void splitMBB(MachineBasicBlock *MBB);
void initMBBInfo();
int64_t computeOffset(const MachineInstr *Br);
+ uint64_t computeOffsetFromTheBeginning(int MBB);
void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL,
MachineBasicBlock *MBBOpnd);
+ bool buildProperJumpMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Pos, DebugLoc DL);
void expandToLongBranch(MBBInfo &Info);
bool handleForbiddenSlot();
bool handlePossibleLongBranch();
@@ -167,7 +171,6 @@ private:
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
MipsABIInfo ABI;
- unsigned LongBranchSeqSize;
bool ForceLongBranchFirstPass = false;
};
@@ -176,7 +179,7 @@ private:
char MipsBranchExpansion::ID = 0;
INITIALIZE_PASS(MipsBranchExpansion, DEBUG_TYPE,
- "Expand out of range branch instructions and prevent forbidden"
+ "Expand out of range branch instructions and fix forbidden"
" slot hazards",
false, false)
@@ -268,7 +271,8 @@ void MipsBranchExpansion::splitMBB(MachineBasicBlock *MBB) {
// Insert NewMBB and fix control flow.
MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
NewMBB->transferSuccessors(MBB);
- NewMBB->removeSuccessor(Tgt, true);
+ if (Tgt != getTargetMBB(*LastBr))
+ NewMBB->removeSuccessor(Tgt, true);
MBB->addSuccessor(NewMBB);
MBB->addSuccessor(Tgt);
MFp->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -294,14 +298,6 @@ void MipsBranchExpansion::initMBBInfo() {
for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
MI != MBB->instr_end(); ++MI)
MBBInfos[I].Size += TII->getInstSizeInBytes(*MI);
-
- // Search for MBB's branch instruction.
- ReverseIter End = MBB->rend();
- ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
-
- if ((Br != End) && !Br->isIndirectBranch() &&
- (Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC)))
- MBBInfos[I].Br = &*Br;
}
}
@@ -326,6 +322,14 @@ int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) {
return -Offset + 4;
}
+// Returns the distance in bytes up until MBB
+uint64_t MipsBranchExpansion::computeOffsetFromTheBeginning(int MBB) {
+ uint64_t Offset = 0;
+ for (int N = 0; N < MBB; ++N)
+ Offset += MBBInfos[N].Size;
+ return Offset;
+}
+
// Replace Br with a branch which has the opposite condition code and a
// MachineBasicBlock operand MBBOpnd.
void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
@@ -359,6 +363,35 @@ void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
Br->eraseFromParent();
}
+bool MipsBranchExpansion::buildProperJumpMI(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Pos,
+ DebugLoc DL) {
+ bool HasR6 = ABI.IsN64() ? STI->hasMips64r6() : STI->hasMips32r6();
+ bool AddImm = HasR6 && !STI->useIndirectJumpsHazard();
+
+ unsigned JR = ABI.IsN64() ? Mips::JR64 : Mips::JR;
+ unsigned JIC = ABI.IsN64() ? Mips::JIC64 : Mips::JIC;
+ unsigned JR_HB = ABI.IsN64() ? Mips::JR_HB64 : Mips::JR_HB;
+ unsigned JR_HB_R6 = ABI.IsN64() ? Mips::JR_HB64_R6 : Mips::JR_HB_R6;
+
+ unsigned JumpOp;
+ if (STI->useIndirectJumpsHazard())
+ JumpOp = HasR6 ? JR_HB_R6 : JR_HB;
+ else
+ JumpOp = HasR6 ? JIC : JR;
+
+ if (JumpOp == Mips::JIC && STI->inMicroMipsMode())
+ JumpOp = Mips::JIC_MMR6;
+
+ unsigned ATReg = ABI.IsN64() ? Mips::AT_64 : Mips::AT;
+ MachineInstrBuilder Instr =
+ BuildMI(*MBB, Pos, DL, TII->get(JumpOp)).addReg(ATReg);
+ if (AddImm)
+ Instr.addImm(0);
+
+ return !AddImm;
+}
+
// Expand branch instructions to long branches.
// TODO: This function has to be fixed for beqz16 and bnez16, because it
// currently assumes that all branches have 16-bit offsets, and will produce
@@ -479,33 +512,21 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
// In NaCl, modifying the sp is not allowed in branch delay slot.
// For MIPS32R6, we can skip using a delay slot branch.
- if (STI->isTargetNaCl() ||
- (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()))
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+
+ if (STI->isTargetNaCl() || !hasDelaySlot) {
+ BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
-
- if (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()) {
- const unsigned JICOp =
- STI->inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
- .addReg(Mips::AT)
- .addImm(0);
-
- } else {
- unsigned JROp =
- STI->useIndirectJumpsHazard()
- ? (STI->hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
- : Mips::JR;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
-
+ }
+ if (hasDelaySlot) {
if (STI->isTargetNaCl()) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
- } else
+ } else {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
.addImm(8);
-
+ }
BalTgtMBB->rbegin()->bundleWithPred();
}
} else {
@@ -597,46 +618,94 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64)
.addImm(0);
- if (STI->hasMips64r6() && !STI->useIndirectJumpsHazard()) {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+ // If there is no delay slot, Insert stack adjustment before
+ if (!hasDelaySlot) {
+ BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::DADDiu),
+ Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64))
- .addReg(Mips::AT_64)
- .addImm(0);
} else {
- unsigned JROp =
- STI->useIndirectJumpsHazard()
- ? (STI->hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
- : Mips::JR64;
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64)
.addImm(16);
BalTgtMBB->rbegin()->bundleWithPred();
}
}
-
- assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize);
- } else {
- // Pre R6: R6:
- // $longbr: $longbr:
- // j $tgt bc $tgt
- // nop $fallthrough
- // $fallthrough:
- //
+ } else { // Not PIC
Pos = LongBrMBB->begin();
LongBrMBB->addSuccessor(TgtMBB);
- if (STI->hasMips32r6())
+
+ // Compute the position of the potentiall jump instruction (basic blocks
+ // before + 4 for the instruction)
+ uint64_t JOffset = computeOffsetFromTheBeginning(MBB->getNumber()) +
+ MBBInfos[MBB->getNumber()].Size + 4;
+ uint64_t TgtMBBOffset = computeOffsetFromTheBeginning(TgtMBB->getNumber());
+ // If it's a forward jump, then TgtMBBOffset will be shifted by two
+ // instructions
+ if (JOffset < TgtMBBOffset)
+ TgtMBBOffset += 2 * 4;
+ // Compare 4 upper bits to check if it's the same segment
+ bool SameSegmentJump = JOffset >> 28 == TgtMBBOffset >> 28;
+
+ if (STI->hasMips32r6() && TII->isBranchOffsetInRange(Mips::BC, I.Offset)) {
+ // R6:
+ // $longbr:
+ // bc $tgt
+ // $fallthrough:
+ //
BuildMI(*LongBrMBB, Pos, DL,
TII->get(STI->inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC))
.addMBB(TgtMBB);
- else
+ } else if (SameSegmentJump) {
+ // Pre R6:
+ // $longbr:
+ // j $tgt
+ // nop
+ // $fallthrough:
+ //
MIBundleBuilder(*LongBrMBB, Pos)
.append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB))
.append(BuildMI(*MFp, DL, TII->get(Mips::NOP)));
-
- assert(LongBrMBB->size() == LongBranchSeqSize);
+ } else {
+ // At this point, offset where we need to branch does not fit into
+ // immediate field of the branch instruction and is not in the same
+ // segment as jump instruction. Therefore we will break it into couple
+ // instructions, where we first load the offset into register, and then we
+ // do branch register.
+ if (ABI.IsN64()) {
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op_64),
+ Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_HIGHEST);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_HIGHER);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+ Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+ } else {
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op),
+ Mips::AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_ADDiu2Op),
+ Mips::AT)
+ .addReg(Mips::AT)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+ }
+ buildProperJumpMI(LongBrMBB, Pos, DL);
+ }
}
if (I.Br->isUnconditionalBranch()) {
@@ -666,8 +735,6 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
if (!STI->hasMips32r6() || STI->inMicroMipsMode())
return false;
- const MipsInstrInfo *TII = STI->getInstrInfo();
-
bool Changed = false;
for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
@@ -704,66 +771,65 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
}
bool MipsBranchExpansion::handlePossibleLongBranch() {
-
- LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI->isTargetNaCl()) ? 10 : 9)
- : (STI->hasMips32r6() ? 1 : 2);
-
if (STI->inMips16Mode() || !STI->enableLongBranchPass())
return false;
if (SkipLongBranch)
return false;
- initMBBInfo();
-
- SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
bool EverMadeChange = false, MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (I = MBBInfos.begin(); I != E; ++I) {
- // Skip if this MBB doesn't have a branch or the branch has already been
- // converted to a long branch.
- if (!I->Br || I->HasLongBranch)
- continue;
+ initMBBInfo();
- int64_t Offset = computeOffset(I->Br);
+ for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+ MachineBasicBlock *MBB = MFp->getBlockNumbered(I);
+ // Search for MBB's branch instruction.
+ ReverseIter End = MBB->rend();
+ ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
- if (STI->isTargetNaCl()) {
- // The offset calculation does not include sandboxing instructions
- // that will be added later in the MC layer. Since at this point we
- // don't know the exact amount of code that "sandboxing" will add, we
- // conservatively estimate that code will not grow more than 100%.
- Offset *= 2;
- }
+ if ((Br != End) && Br->isBranch() && !Br->isIndirectBranch() &&
+ (Br->isConditionalBranch() ||
+ (Br->isUnconditionalBranch() && IsPIC))) {
+ int64_t Offset = computeOffset(&*Br);
- // Check if offset fits into the immediate field of the branch.
- if (!ForceLongBranchFirstPass &&
- TII->isBranchOffsetInRange(I->Br->getOpcode(), Offset))
- continue;
+ if (STI->isTargetNaCl()) {
+ // The offset calculation does not include sandboxing instructions
+ // that will be added later in the MC layer. Since at this point we
+ // don't know the exact amount of code that "sandboxing" will add, we
+ // conservatively estimate that code will not grow more than 100%.
+ Offset *= 2;
+ }
- I->HasLongBranch = true;
- I->Size += LongBranchSeqSize * 4;
- ++LongBranches;
- EverMadeChange = MadeChange = true;
- }
- }
+ if (ForceLongBranchFirstPass ||
+ !TII->isBranchOffsetInRange(Br->getOpcode(), Offset)) {
+ MBBInfos[I].Offset = Offset;
+ MBBInfos[I].Br = &*Br;
+ }
+ }
+ } // End for
- ForceLongBranchFirstPass = false;
+ ForceLongBranchFirstPass = false;
- if (!EverMadeChange)
- return false;
+ SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
+
+ for (I = MBBInfos.begin(); I != E; ++I) {
+ // Skip if this MBB doesn't have a branch or the branch has already been
+ // converted to a long branch.
+ if (!I->Br)
+ continue;
- // Do the expansion.
- for (I = MBBInfos.begin(); I != E; ++I)
- if (I->HasLongBranch) {
expandToLongBranch(*I);
+ ++LongBranches;
+ EverMadeChange = MadeChange = true;
}
- MFp->RenumberBlocks();
+ MFp->RenumberBlocks();
+ }
- return true;
+ return EverMadeChange;
}
bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
index 81a1cced93b7..90cb3f437bd5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -24,10 +24,10 @@ static bool isF128SoftLibCall(const char *CallSym) {
"__lttf2", "__multf3", "__netf2", "__powitf2",
"__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2",
"ceill", "copysignl", "cosl", "exp2l",
- "expl", "floorl", "fmal", "fmodl",
- "log10l", "log2l", "logl", "nearbyintl",
- "powl", "rintl", "roundl", "sinl",
- "sqrtl", "truncl"};
+ "expl", "floorl", "fmal", "fmaxl",
+ "fmodl", "log10l", "log2l", "logl",
+ "nearbyintl", "powl", "rintl", "roundl",
+ "sinl", "sqrtl", "truncl"};
// Check that LibCalls is sorted alphabetically.
auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
index a705ebb6b193..c550fadf6632 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -16,6 +16,7 @@
#include "MipsCallLowering.h"
#include "MipsCCState.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
using namespace llvm;
@@ -23,48 +24,89 @@ using namespace llvm;
MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
: CallLowering(&TLI) {}
-bool MipsCallLowering::MipsHandler::assign(const CCValAssign &VA,
- unsigned vreg) {
+bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
+ const CCValAssign &VA) {
if (VA.isRegLoc()) {
- assignValueToReg(vreg, VA.getLocReg());
+ assignValueToReg(VReg, VA);
} else if (VA.isMemLoc()) {
- unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- unsigned StackAddr = getStackAddress(Size, Offset, MPO);
- assignValueToAddress(vreg, StackAddr, Size, MPO);
+ assignValueToAddress(VReg, VA);
} else {
return false;
}
return true;
}
+bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<unsigned> VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex) {
+ for (unsigned i = 0; i < VRegs.size(); ++i)
+ if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i]))
+ return false;
+ return true;
+}
+
+void MipsCallLowering::MipsHandler::setLeastSignificantFirst(
+ SmallVectorImpl<unsigned> &VRegs) {
+ if (!MIRBuilder.getMF().getDataLayout().isLittleEndian())
+ std::reverse(VRegs.begin(), VRegs.end());
+}
+
+bool MipsCallLowering::MipsHandler::handle(
+ ArrayRef<CCValAssign> ArgLocs, ArrayRef<CallLowering::ArgInfo> Args) {
+ SmallVector<unsigned, 4> VRegs;
+ unsigned SplitLength;
+ const Function &F = MIRBuilder.getMF().getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const MipsTargetLowering &TLI = *static_cast<const MipsTargetLowering *>(
+ MIRBuilder.getMF().getSubtarget().getTargetLowering());
+
+ for (unsigned ArgsIndex = 0, ArgLocsIndex = 0; ArgsIndex < Args.size();
+ ++ArgsIndex, ArgLocsIndex += SplitLength) {
+ EVT VT = TLI.getValueType(DL, Args[ArgsIndex].Ty);
+ SplitLength = TLI.getNumRegistersForCallingConv(F.getContext(),
+ F.getCallingConv(), VT);
+ if (SplitLength > 1) {
+ VRegs.clear();
+ MVT RegisterVT = TLI.getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), VT);
+ for (unsigned i = 0; i < SplitLength; ++i)
+ VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT}));
+
+ if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg))
+ return false;
+ } else {
+ if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex]))
+ return false;
+ }
+ }
+ return true;
+}
+
namespace {
class IncomingValueHandler : public MipsCallLowering::MipsHandler {
public:
IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: MipsHandler(MIRBuilder, MRI) {}
- bool handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args);
-
private:
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+ void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+ unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) override;
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) override;
+ void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
- MachinePointerInfo &MPO) override;
+ bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
- void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
- MachinePointerInfo &MPO) {
- MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad, Size, Alignment);
+ void buildLoad(unsigned Val, const CCValAssign &VA) {
+ MachineMemOperand *MMO;
+ unsigned Addr = getStackAddress(VA, MMO);
MIRBuilder.buildLoad(Val, Addr, *MMO);
}
};
@@ -86,17 +128,34 @@ private:
} // end anonymous namespace
void IncomingValueHandler::assignValueToReg(unsigned ValVReg,
- unsigned PhysReg) {
- MIRBuilder.buildCopy(ValVReg, PhysReg);
+ const CCValAssign &VA) {
+ unsigned PhysReg = VA.getLocReg();
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
markPhysRegUsed(PhysReg);
}
-unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) {
+unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
+ unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ unsigned Offset = VA.getLocMemOffset();
MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
- MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ MachinePointerInfo MPO =
+ MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOLoad,
+ Size, /* Alignment */ 0);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
MIRBuilder.buildFrameIndex(AddrReg, FI);
@@ -104,19 +163,26 @@ unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
return AddrReg;
}
-void IncomingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) {
- // If the value is not extended, a simple load will suffice.
- buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+void IncomingValueHandler::assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) {
+ if (VA.getLocInfo() == CCValAssign::SExt ||
+ VA.getLocInfo() == CCValAssign::ZExt ||
+ VA.getLocInfo() == CCValAssign::AExt) {
+ unsigned LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ buildLoad(LoadReg, VA);
+ MIRBuilder.buildTrunc(ValVReg, LoadReg);
+ } else
+ buildLoad(ValVReg, VA);
}
-bool IncomingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args) {
- for (unsigned i = 0, ArgsSize = Args.size(); i < ArgsSize; ++i) {
- if (!assign(ArgLocs[i], Args[i].Reg))
- return false;
- }
+bool IncomingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) {
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ return false;
+ setLeastSignificantFirst(VRegs);
+ MIRBuilder.buildMerge(ArgsReg, VRegs);
return true;
}
@@ -127,103 +193,179 @@ public:
MachineInstrBuilder &MIB)
: MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
- bool handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args);
-
private:
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+ void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+ unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) override;
+
+ void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) override;
+ bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) override;
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
- MachinePointerInfo &MPO) override;
+ unsigned extendRegister(unsigned ValReg, const CCValAssign &VA);
MachineInstrBuilder &MIB;
};
} // end anonymous namespace
void OutgoingValueHandler::assignValueToReg(unsigned ValVReg,
- unsigned PhysReg) {
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ const CCValAssign &VA) {
+ unsigned PhysReg = VA.getLocReg();
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
-unsigned OutgoingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) {
+unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) {
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
unsigned SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, Mips::SP);
unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ unsigned Offset = VA.getLocMemOffset();
MIRBuilder.buildConstant(OffsetReg, Offset);
unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
- MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ MachinePointerInfo MPO =
+ MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOStore,
+ Size, /* Alignment */ 0);
+
return AddrReg;
}
-void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) {
- MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, Size, /* Alignment */ 0);
- MIRBuilder.buildStore(ValVReg, Addr, *MMO);
+void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) {
+ MachineMemOperand *MMO;
+ unsigned Addr = getStackAddress(VA, MMO);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
-bool OutgoingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
- ArrayRef<CallLowering::ArgInfo> Args) {
- for (unsigned i = 0; i < Args.size(); ++i) {
- if (!assign(ArgLocs[i], Args[i].Reg))
- return false;
+unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
+ const CCValAssign &VA) {
+ LLT LocTy{VA.getLocVT()};
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildSExt(ExtReg, ValReg);
+ return ExtReg;
+ }
+ case CCValAssign::ZExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildZExt(ExtReg, ValReg);
+ return ExtReg;
}
+ case CCValAssign::AExt: {
+ unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildAnyExt(ExtReg, ValReg);
+ return ExtReg;
+ }
+ // TODO : handle upper extends
+ case CCValAssign::Full:
+ return ValReg;
+ default:
+ break;
+ }
+ llvm_unreachable("unable to extend register");
+}
+
+bool OutgoingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex,
+ unsigned ArgsReg) {
+ MIRBuilder.buildUnmerge(VRegs, ArgsReg);
+ setLeastSignificantFirst(VRegs);
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ return false;
+
return true;
}
static bool isSupportedType(Type *T) {
- if (T->isIntegerTy() && T->getScalarSizeInBits() == 32)
+ if (T->isIntegerTy())
return true;
if (T->isPointerTy())
return true;
return false;
}
+static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT,
+ const ISD::ArgFlagsTy &Flags) {
+ // > does not mean loss of information as type RegisterVT can't hold type VT,
+ // it means that type VT is split into multiple registers of type RegisterVT
+ if (VT.getSizeInBits() >= RegisterVT.getSizeInBits())
+ return CCValAssign::LocInfo::Full;
+ if (Flags.isSExt())
+ return CCValAssign::LocInfo::SExt;
+ if (Flags.isZExt())
+ return CCValAssign::LocInfo::ZExt;
+ return CCValAssign::LocInfo::AExt;
+}
+
+template <typename T>
+static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<T> &Arguments) {
+ for (unsigned i = 0; i < ArgLocs.size(); ++i) {
+ const CCValAssign &VA = ArgLocs[i];
+ CCValAssign::LocInfo LocInfo = determineLocInfo(
+ Arguments[i].VT, Arguments[i].ArgVT, Arguments[i].Flags);
+ if (VA.isMemLoc())
+ ArgLocs[i] =
+ CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+ VA.getLocMemOffset(), VA.getLocVT(), LocInfo);
+ else
+ ArgLocs[i] = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+ VA.getLocReg(), VA.getLocVT(), LocInfo);
+ }
+}
+
bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
- if (Val != nullptr) {
- if (!isSupportedType(Val->getType()))
- return false;
+ if (Val != nullptr && !isSupportedType(Val->getType()))
+ return false;
+ if (!VRegs.empty()) {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
+ LLVMContext &Ctx = Val->getType()->getContext();
+
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> RetInfos;
SmallVector<unsigned, 8> OrigArgIndices;
- ArgInfo ArgRetInfo(VReg, Val->getType());
- setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(ArgRetInfo, 0, RetInfos, OrigArgIndices);
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices);
+ }
SmallVector<ISD::OutputArg, 8> Outs;
- subTargetRegTypeForCallingConv(
- MIRBuilder, RetInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs) {
- Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs);
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
CCInfo.AnalyzeReturn(Outs, TLI.CCAssignFnForReturn());
+ setLocInfo(ArgLocs, Outs);
OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
if (!RetHandler.handle(ArgLocs, RetInfos)) {
@@ -266,12 +408,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
}
SmallVector<ISD::InputArg, 8> Ins;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
- unsigned partOffs) {
- Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Ins);
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -283,6 +420,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(F.getCallingConv()),
1);
CCInfo.AnalyzeFormalArguments(Ins, TLI.CCAssignFnForCall());
+ setLocInfo(ArgLocs, Ins);
IncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
if (!Handler.handle(ArgLocs, ArgInfos))
@@ -347,12 +485,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
}
SmallVector<ISD::OutputArg, 8> Outs;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigArgIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
- unsigned partOffs) {
- Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs);
SmallVector<CCValAssign, 8> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -361,6 +494,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr;
CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
+ setLocInfo(ArgLocs, Outs);
OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), MIB);
if (!RetHandler.handle(ArgLocs, ArgInfos)) {
@@ -383,18 +517,14 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices);
SmallVector<ISD::InputArg, 8> Ins;
- subTargetRegTypeForCallingConv(
- MIRBuilder, ArgInfos, OrigRetIndices,
- [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs) {
- Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
- });
+ subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins);
SmallVector<CCValAssign, 8> ArgLocs;
MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
F.getContext());
CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call);
+ setLocInfo(ArgLocs, Ins);
CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB);
if (!Handler.handle(ArgLocs, ArgInfos))
@@ -406,11 +536,10 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return true;
}
+template <typename T>
void MipsCallLowering::subTargetRegTypeForCallingConv(
- MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
- ArrayRef<unsigned> OrigArgIndices, const FunTy &PushBack) const {
- MachineFunction &MF = MIRBuilder.getMF();
- const Function &F = MF.getFunction();
+ const Function &F, ArrayRef<ArgInfo> Args,
+ ArrayRef<unsigned> OrigArgIndices, SmallVectorImpl<T> &ISDArgs) const {
const DataLayout &DL = F.getParent()->getDataLayout();
const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
@@ -420,12 +549,20 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
EVT VT = TLI.getValueType(DL, Arg.Ty);
MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
F.getCallingConv(), VT);
+ unsigned NumRegs = TLI.getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), VT);
- ISD::ArgFlagsTy Flags = Arg.Flags;
- Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ ISD::ArgFlagsTy Flags = Arg.Flags;
- PushBack(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0);
+ if (i == 0)
+ Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+ else
+ Flags.setOrigAlign(1);
+ ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo],
+ 0);
+ }
++ArgNo;
}
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
index e23c10cec563..9916b04ef50c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
@@ -31,27 +31,38 @@ public:
virtual ~MipsHandler() = default;
+ bool handle(ArrayRef<CCValAssign> ArgLocs,
+ ArrayRef<CallLowering::ArgInfo> Args);
+
protected:
- bool assign(const CCValAssign &VA, unsigned vreg);
+ bool assignVRegs(ArrayRef<unsigned> VRegs, ArrayRef<CCValAssign> ArgLocs,
+ unsigned Index);
+
+ void setLeastSignificantFirst(SmallVectorImpl<unsigned> &VRegs);
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
private:
- virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
- MachinePointerInfo &MPO) = 0;
+ bool assign(unsigned VReg, const CCValAssign &VA);
+
+ virtual unsigned getStackAddress(const CCValAssign &VA,
+ MachineMemOperand *&MMO) = 0;
- virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg) = 0;
+ virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0;
- virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
- uint64_t Size,
- MachinePointerInfo &MPO) = 0;
+ virtual void assignValueToAddress(unsigned ValVReg,
+ const CCValAssign &VA) = 0;
+
+ virtual bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0;
};
MipsCallLowering(const MipsTargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
@@ -61,21 +72,16 @@ public:
ArrayRef<ArgInfo> OrigArgs) const override;
private:
- using FunTy =
- std::function<void(ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
- unsigned origIdx, unsigned partOffs)>;
-
/// Based on registers available on target machine split or extend
/// type if needed, also change pointer type to appropriate integer
- /// type. Lambda will fill some info so we can tell MipsCCState to
- /// assign physical registers.
- void subTargetRegTypeForCallingConv(MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ /// type.
+ template <typename T>
+ void subTargetRegTypeForCallingConv(const Function &F, ArrayRef<ArgInfo> Args,
ArrayRef<unsigned> OrigArgIndices,
- const FunTy &PushBack) const;
+ SmallVectorImpl<T> &ISDArgs) const;
/// Split structures and arrays, save original argument indices since
- /// Mips calling conv needs info about original argument type.
+ /// Mips calling convention needs info about original argument type.
void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex,
SmallVectorImpl<ArgInfo> &SplitArgs,
SmallVectorImpl<unsigned> &SplitArgsOrigIndices) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
index 39dc2654aa6a..0d7e3e200b5f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCondMov.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
@@ -296,3 +296,13 @@ def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F<GPR64Opnd>;
def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
+
+let usesCustomInserter = 1 in {
+class D_SELECT_CLASS<RegisterOperand RC> :
+ PseudoSE<(outs RC:$dst1, RC:$dst2),
+ (ins GPR32Opnd:$cond, RC:$a1, RC:$a2, RC:$b1, RC:$b2), []>,
+ ISA_MIPS1_NOT_4_32;
+}
+
+def PseudoD_SELECT_I : D_SELECT_CLASS<GPR32Opnd>;
+def PseudoD_SELECT_I64 : D_SELECT_CLASS<GPR64Opnd>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 33f03b954a8c..e3823e0dfdb8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -728,9 +728,10 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
(Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
continue;
- // Instructions LWP/SWP should not be in a delay slot as that
+ // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
// results in unpredictable behaviour
- if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM))
+ if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM ||
+ Opcode == Mips::MOVEP_MM))
continue;
Filler = CurrI;
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0677d378a115..8c2a364cdfa9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1396,6 +1396,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case Mips::PseudoSELECTFP_T_D32:
case Mips::PseudoSELECTFP_T_D64:
return emitPseudoSELECT(MI, BB, true, Mips::BC1T);
+ case Mips::PseudoD_SELECT_I:
+ case Mips::PseudoD_SELECT_I64:
+ return emitPseudoD_SELECT(MI, BB);
}
}
@@ -2427,6 +2430,16 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
+
+ if (!(Subtarget.hasMips4() || Subtarget.hasMips32())) {
+ SDVTList VTList = DAG.getVTList(VT, VT);
+ return DAG.getNode(Subtarget.isGP64bit() ? Mips::PseudoD_SELECT_I64
+ : Mips::PseudoD_SELECT_I,
+ DL, VTList, Cond, ShiftRightHi,
+ IsSRA ? Ext : DAG.getConstant(0, DL, VT), Or,
+ ShiftRightHi);
+ }
+
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi);
@@ -2563,10 +2576,12 @@ static SDValue lowerUnalignedIntStore(StoreSDNode *SD, SelectionDAG &DAG,
}
// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr).
-static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG) {
+static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG,
+ bool SingleFloat) {
SDValue Val = SD->getValue();
- if (Val.getOpcode() != ISD::FP_TO_SINT)
+ if (Val.getOpcode() != ISD::FP_TO_SINT ||
+ (Val.getValueSizeInBits() > 32 && SingleFloat))
return SDValue();
EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits());
@@ -2587,7 +2602,7 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
((MemVT == MVT::i32) || (MemVT == MVT::i64)))
return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle());
- return lowerFP_TO_SINT_STORE(SD, DAG);
+ return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat());
}
SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
@@ -2603,6 +2618,9 @@ SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
+ if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat())
+ return SDValue();
+
EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
SDValue Trunc = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Op), FPTy,
Op.getOperand(0));
@@ -4340,6 +4358,81 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI,
return BB;
}
+MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) &&
+ "Subtarget already supports SELECT nodes with the use of"
+ "conditional-move instructions.");
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // D_SELECT substitutes two SELECT nodes that goes one after another and
+ // have the same condition operand. On machines which don't have
+ // conditional-move instruction, it reduces unnecessary branch instructions
+ // which are result of using two diamond patterns that are result of two
+ // SELECT pseudo instructions.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // bne rs, $0, sinkMBB
+ BuildMI(BB, DL, TII->get(Mips::BNE))
+ .addReg(MI.getOperand(2).getReg())
+ .addReg(Mips::ZERO)
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ // Use two PHI nodes to select two reults
+ BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(3).getReg())
+ .addMBB(thisMBB)
+ .addReg(MI.getOperand(5).getReg())
+ .addMBB(copy0MBB);
+ BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(4).getReg())
+ .addMBB(thisMBB)
+ .addReg(MI.getOperand(6).getReg())
+ .addMBB(copy0MBB);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+
+ return BB;
+}
+
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 5a0de45c44f3..e043f133a09f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,6 +699,8 @@ class TargetRegisterClass;
MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB,
bool isFPCmp, unsigned Opc) const;
+ MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
};
/// Create MipsTargetLowering objects.
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index e986942ad8fa..4cb8574e08f6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -130,6 +130,15 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
HARDFLOAT,
NeverHasSideEffects;
+class CVT_PS_S_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fd), (ins SrcRC:$fs, SrcRC:$ft),
+ !strconcat(opstr, "\t$fd, $fs, $ft"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs, SrcRC:$ft))], Itin, FrmFR, opstr>,
+ HARDFLOAT {
+ let isCommutable = IsComm;
+}
+
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
@@ -432,6 +441,29 @@ let AdditionalPredicates = [NotInMicroMips] in {
def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x21, 20>, ISA_MIPS1, FGR_32;
}
+
+let DecoderNamespace = "MipsFP64" in {
+ let AdditionalPredicates = [NotInMicroMips] in {
+ def PLL_PS64 : ADDS_FT<"pll.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2C, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def PLU_PS64 : ADDS_FT<"plu.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2D, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+ def CVT_S_PU64 : ABSS_FT<"cvt.s.pu", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x20, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def CVT_S_PL64 : ABSS_FT<"cvt.s.pl", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x28, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+ def CVT_PS_S64 : CVT_PS_S_FT<"cvt.ps.s", FGR64Opnd, FGR32Opnd, II_CVT, 0>,
+ ADDS_FM<0x26, 16>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ }
+}
+
let DecoderNamespace = "MipsFP64" in {
let AdditionalPredicates = [NotInMicroMips] in {
def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0e0e712dba19..bfb4c775205d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -280,6 +280,8 @@ bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset)
switch (BranchOpc) {
case Mips::B:
case Mips::BAL:
+ case Mips::BAL_BR:
+ case Mips::BAL_BR_MM:
case Mips::BC1F:
case Mips::BC1FL:
case Mips::BC1T:
@@ -661,8 +663,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
}
MIB.copyImplicitOps(*I);
-
- MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
+ MIB.cloneMemRefs(*I);
return MIB;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 0faa13d4d63f..d9398b7d6024 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -2002,13 +2002,19 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
// branches. See the comment in file MipsLongBranch.cpp for detailed
// explanation.
-// Expands to: lui $dst, %hi($tgt - $baltgt)
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
(ins brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: lui $dst, highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins brtarget:$tgt), []>;
-// Expands to: addiu $dst, $src, %lo($tgt - $baltgt)
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
(ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins GPR32Opnd:$src, brtarget:$tgt), []>;
//===----------------------------------------------------------------------===//
// Instruction definition
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 6c5b83021f74..b041590ee343 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -15,6 +15,7 @@
#include "MipsRegisterBankInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#define DEBUG_TYPE "mips-isel"
@@ -144,6 +145,42 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.addMemOperand(*I.memoperands_begin());
break;
}
+ case G_UDIV:
+ case G_UREM:
+ case G_SDIV:
+ case G_SREM: {
+ unsigned HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+ bool IsSigned = I.getOpcode() == G_SREM || I.getOpcode() == G_SDIV;
+ bool IsDiv = I.getOpcode() == G_UDIV || I.getOpcode() == G_SDIV;
+
+ MachineInstr *PseudoDIV, *PseudoMove;
+ PseudoDIV = BuildMI(MBB, I, I.getDebugLoc(),
+ TII.get(IsSigned ? Mips::PseudoSDIV : Mips::PseudoUDIV))
+ .addDef(HILOReg)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ if (!constrainSelectedInstRegOperands(*PseudoDIV, TII, TRI, RBI))
+ return false;
+
+ PseudoMove = BuildMI(MBB, I, I.getDebugLoc(),
+ TII.get(IsDiv ? Mips::PseudoMFLO : Mips::PseudoMFHI))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(HILOReg);
+ if (!constrainSelectedInstRegOperands(*PseudoMove, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
+ case G_SELECT: {
+ // Handle operands with pointer type.
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MOVN_I_I))
+ .add(I.getOperand(0))
+ .add(I.getOperand(2))
+ .add(I.getOperand(1))
+ .add(I.getOperand(3));
+ break;
+ }
case G_CONSTANT: {
int Imm = I.getOperand(1).getCImm()->getValue().getLimitedValue();
unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
@@ -193,7 +230,85 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_ICMP: {
+ struct Instr {
+ unsigned Opcode, Def, LHS, RHS;
+ Instr(unsigned Opcode, unsigned Def, unsigned LHS, unsigned RHS)
+ : Opcode(Opcode), Def(Def), LHS(LHS), RHS(RHS){};
+
+ bool hasImm() const {
+ if (Opcode == Mips::SLTiu || Opcode == Mips::XORi)
+ return true;
+ return false;
+ }
+ };
+
+ SmallVector<struct Instr, 2> Instructions;
+ unsigned ICMPReg = I.getOperand(0).getReg();
+ unsigned Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ unsigned LHS = I.getOperand(2).getReg();
+ unsigned RHS = I.getOperand(3).getReg();
+ CmpInst::Predicate Cond =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+
+ switch (Cond) {
+ case CmpInst::ICMP_EQ: // LHS == RHS -> (LHS ^ RHS) < 1
+ Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::SLTiu, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_NE: // LHS != RHS -> 0 < (LHS ^ RHS)
+ Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, Mips::ZERO, Temp);
+ break;
+ case CmpInst::ICMP_UGT: // LHS > RHS -> RHS < LHS
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, RHS, LHS);
+ break;
+ case CmpInst::ICMP_UGE: // LHS >= RHS -> !(LHS < RHS)
+ Instructions.emplace_back(Mips::SLTu, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_ULT: // LHS < RHS -> LHS < RHS
+ Instructions.emplace_back(Mips::SLTu, ICMPReg, LHS, RHS);
+ break;
+ case CmpInst::ICMP_ULE: // LHS <= RHS -> !(RHS < LHS)
+ Instructions.emplace_back(Mips::SLTu, Temp, RHS, LHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_SGT: // LHS > RHS -> RHS < LHS
+ Instructions.emplace_back(Mips::SLT, ICMPReg, RHS, LHS);
+ break;
+ case CmpInst::ICMP_SGE: // LHS >= RHS -> !(LHS < RHS)
+ Instructions.emplace_back(Mips::SLT, Temp, LHS, RHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ case CmpInst::ICMP_SLT: // LHS < RHS -> LHS < RHS
+ Instructions.emplace_back(Mips::SLT, ICMPReg, LHS, RHS);
+ break;
+ case CmpInst::ICMP_SLE: // LHS <= RHS -> !(RHS < LHS)
+ Instructions.emplace_back(Mips::SLT, Temp, RHS, LHS);
+ Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+ break;
+ default:
+ return false;
+ }
+
+ MachineIRBuilder B(I);
+ for (const struct Instr &Instruction : Instructions) {
+ MachineInstrBuilder MIB = B.buildInstr(
+ Instruction.Opcode, {Instruction.Def}, {Instruction.LHS});
+
+ if (Instruction.hasImm())
+ MIB.addImm(Instruction.RHS);
+ else
+ MIB.addUse(Instruction.RHS);
+ if (!MIB.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index fb259516be09..c629f02af00e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -13,23 +13,53 @@
#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
using namespace llvm;
MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
using namespace TargetOpcode;
+ const LLT s1 = LLT::scalar(1);
const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder(G_ADD).legalFor({s32});
+ getActionDefinitionsBuilder(G_ADD)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder(G_UADDE)
+ .lowerFor({{s32, s1}});
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalForCartesianProduct({p0, s32}, {p0});
- getActionDefinitionsBuilder(G_CONSTANT)
+ getActionDefinitionsBuilder(G_SELECT)
+ .legalForCartesianProduct({p0, s32}, {s32})
+ .minScalar(0, s32)
+ .minScalar(1, s32);
+
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
+ getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
.legalFor({s32});
+ getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
+ .legalFor({s32})
+ .minScalar(0, s32)
+ .libcallFor({s64});
+
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalFor({{s32, s32}})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32})
+ .clampScalar(0, s32, s32);
+
getActionDefinitionsBuilder(G_GEP)
.legalFor({{p0, s32}});
@@ -42,3 +72,15 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
computeTables();
verify(*ST.getInstrInfo());
}
+
+bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+
+ using namespace TargetOpcode;
+
+ MIRBuilder.setInstr(MI);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
index 36dd39c8c1c1..75fadd6cf613 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -24,6 +25,10 @@ class MipsSubtarget;
class MipsLegalizerInfo : public LegalizerInfo {
public:
MipsLegalizerInfo(const MipsSubtarget &ST);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 2b7f64099923..46b37ceae391 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -298,12 +298,16 @@ bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI,
default:
return false;
case Mips::LONG_BRANCH_LUi:
+ case Mips::LONG_BRANCH_LUi2Op:
+ case Mips::LONG_BRANCH_LUi2Op_64:
lowerLongBranchLUi(MI, OutMI);
return true;
case Mips::LONG_BRANCH_ADDiu:
+ case Mips::LONG_BRANCH_ADDiu2Op:
lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu);
return true;
case Mips::LONG_BRANCH_DADDiu:
+ case Mips::LONG_BRANCH_DADDiu2Op:
lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu);
return true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index d83f75ffa1c1..eecc7c573df1 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -107,6 +107,18 @@ class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
(ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
// ISD::SETFALSE cannot occur
+def vfseteq_v4f32 : vfsetcc_type<v4i32, v4f32, SETEQ>;
+def vfseteq_v2f64 : vfsetcc_type<v2i64, v2f64, SETEQ>;
+def vfsetge_v4f32 : vfsetcc_type<v4i32, v4f32, SETGE>;
+def vfsetge_v2f64 : vfsetcc_type<v2i64, v2f64, SETGE>;
+def vfsetgt_v4f32 : vfsetcc_type<v4i32, v4f32, SETGT>;
+def vfsetgt_v2f64 : vfsetcc_type<v2i64, v2f64, SETGT>;
+def vfsetle_v4f32 : vfsetcc_type<v4i32, v4f32, SETLE>;
+def vfsetle_v2f64 : vfsetcc_type<v2i64, v2f64, SETLE>;
+def vfsetlt_v4f32 : vfsetcc_type<v4i32, v4f32, SETLT>;
+def vfsetlt_v2f64 : vfsetcc_type<v2i64, v2f64, SETLT>;
+def vfsetne_v4f32 : vfsetcc_type<v4i32, v4f32, SETNE>;
+def vfsetne_v2f64 : vfsetcc_type<v2i64, v2f64, SETNE>;
def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
@@ -4038,3 +4050,20 @@ def : MSAPat<
(SPLAT_D v2f64:$ws,
(COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
sub_64))>;
+
+def : MSAPat<(vfseteq_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCEQ_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfseteq_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCEQ_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetle_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCLE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetle_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCLE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetlt_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCLT_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetlt_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCLT_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetne_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+ (FCNE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetne_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+ (FCNE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..1cff1c8396ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -0,0 +1,92 @@
+//=== lib/CodeGen/GlobalISel/MipsPreLegalizerCombiner.cpp --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define DEBUG_TYPE "mips-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+class MipsPreLegalizerCombinerInfo : public CombinerInfo {
+public:
+ MipsPreLegalizerCombinerInfo()
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr) {}
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ return false;
+}
+
+// Pass boilerplate
+// ================
+
+class MipsPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MipsPreLegalizerCombiner();
+
+ StringRef getPassName() const override { return "MipsPreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void MipsPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MipsPreLegalizerCombiner::MipsPreLegalizerCombiner() : MachineFunctionPass(ID) {
+ initializeMipsPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool MipsPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ MipsPreLegalizerCombinerInfo PCInfo;
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, nullptr);
+}
+
+char MipsPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(MipsPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs before legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(MipsPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createMipsPreLegalizeCombiner() {
+ return new MipsPreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 351135079217..6af1f10189df 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -57,7 +57,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
switch (RC.getID()) {
case Mips::GPR32RegClassID:
case Mips::CPU16Regs_and_GPRMM16ZeroRegClassID:
+ case Mips::GPRMM16MovePPairFirstRegClassID:
+ case Mips::CPU16Regs_and_GPRMM16MovePPairSecondRegClassID:
case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
+ case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
case Mips::SP32RegClassID:
return getRegBank(Mips::GPRBRegBankID);
default:
@@ -84,6 +87,16 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_LOAD:
case G_STORE:
case G_GEP:
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_SHL:
+ case G_ASHR:
+ case G_LSHR:
+ case G_SDIV:
+ case G_UDIV:
+ case G_SREM:
+ case G_UREM:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
case G_CONSTANT:
@@ -92,6 +105,19 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
+ case G_ICMP:
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
+ case G_SELECT:
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
default:
return getInvalidInstructionMapping();
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 4cc50fb981ba..b84aaad05eb5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,8 +57,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index c85ee20273c0..a943a0ad4094 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -335,6 +335,16 @@ def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S2, S3, S4)>;
+def GPRMM16MovePPairFirst : RegisterClass<"Mips", [i32], 32, (add
+ // Arguments
+ A0, A1, A2)>;
+
+def GPRMM16MovePPairSecond : RegisterClass<"Mips", [i32], 32, (add
+ // Arguments
+ A1, A2, A3,
+ // Callee save
+ S5, S6)>;
+
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
ZERO_64, AT_64,
@@ -522,6 +532,16 @@ def GPRMM16AsmOperandMoveP : MipsAsmRegOperand {
let PredicateMethod = "isMM16AsmRegMoveP";
}
+def GPRMM16AsmOperandMovePPairFirst : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmRegMovePPairFirst";
+ let PredicateMethod = "isMM16AsmRegMovePPairFirst";
+}
+
+def GPRMM16AsmOperandMovePPairSecond : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmRegMovePPairSecond";
+ let PredicateMethod = "isMM16AsmRegMovePPairSecond";
+}
+
def ACC64DSPAsmOperand : MipsAsmRegOperand {
let Name = "ACC64DSPAsmReg";
let PredicateMethod = "isACCAsmReg";
@@ -613,6 +633,14 @@ def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> {
let EncoderMethod = "getMovePRegSingleOpValue";
}
+def GPRMM16OpndMovePPairFirst : RegisterOperand<GPRMM16MovePPairFirst> {
+ let ParserMatchClass = GPRMM16AsmOperandMovePPairFirst;
+}
+
+def GPRMM16OpndMovePPairSecond : RegisterOperand<GPRMM16MovePPairSecond> {
+ let ParserMatchClass = GPRMM16AsmOperandMovePPairSecond;
+}
+
def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cf2899dd375e..cf196b597278 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -244,7 +244,7 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
break;
}
- // fallthrough
+ LLVM_FALLTHROUGH;
case Mips::BuildPairF64:
case Mips::ExtractElementF64:
if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1())
@@ -795,6 +795,24 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
+ case Mips::PseudoD_SELECT_I:
+ case Mips::PseudoD_SELECT_I64: {
+ MVT VT = Subtarget->isGP64bit() ? MVT::i64 : MVT::i32;
+ SDValue cond = Node->getOperand(0);
+ SDValue Hi1 = Node->getOperand(1);
+ SDValue Lo1 = Node->getOperand(2);
+ SDValue Hi2 = Node->getOperand(3);
+ SDValue Lo2 = Node->getOperand(4);
+
+ SDValue ops[] = {cond, Hi1, Lo1, Hi2, Lo2};
+ EVT NodeTys[] = {VT, VT};
+ ReplaceNode(Node, CurDAG->getMachineNode(Subtarget->isGP64bit()
+ ? Mips::PseudoD_SELECT_I64
+ : Mips::PseudoD_SELECT_I,
+ DL, NodeTys, ops));
+ return true;
+ }
+
case ISD::ADDE: {
selectAddE(Node, DL);
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index f625a2903bd7..a78e544c35f0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -158,8 +158,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -2360,24 +2360,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
}
}
-/// Check if the given BuildVectorSDNode is a splat.
-/// This method currently relies on DAG nodes being reused when equivalent,
-/// so it's possible for this to return false even when isConstantSplat returns
-/// true.
-static bool isSplatVector(const BuildVectorSDNode *N) {
- unsigned int nOps = N->getNumOperands();
- assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
-
- SDValue Operand0 = N->getOperand(0);
-
- for (unsigned int i = 1; i < nOps; ++i) {
- if (N->getOperand(i) != Operand0)
- return false;
- }
-
- return true;
-}
-
// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
//
// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
@@ -2488,7 +2470,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
return Result;
- } else if (isSplatVector(Node))
+ } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
return Op;
else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index e8589fc53492..c7ab90ed2a3b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -222,9 +222,9 @@ static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) {
/// We check for the common case of 'or', as it's MIPS' preferred instruction
/// for GPRs but we have to check the operands to ensure that is the case.
/// Other move instructions for MIPS are directly identifiable.
-bool MipsSEInstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
bool isDSPControlWrite = false;
// Condition is made to match the creation of WRDSP/RDDSP copy instruction
// from copyPhysReg function.
@@ -421,12 +421,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandERet(MBB, MI);
break;
case Mips::PseudoMFHI:
- Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
- expandPseudoMFHiLo(MBB, MI, Opc);
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI);
+ break;
+ case Mips::PseudoMFHI_MM:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI16_MM);
break;
case Mips::PseudoMFLO:
- Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO;
- expandPseudoMFHiLo(MBB, MI, Opc);
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO);
+ break;
+ case Mips::PseudoMFLO_MM:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO16_MM);
break;
case Mips::PseudoMFHI64:
expandPseudoMFHiLo(MBB, MI, Mips::MFHI64);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index fc55716d598a..fce0fe5f58ad 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -47,9 +47,6 @@ public:
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
-
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -79,6 +76,13 @@ public:
MachineBasicBlock::iterator II, const DebugLoc &DL,
unsigned *NewImm) const;
+protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 64db815a0f4c..410fa655a225 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -154,6 +154,7 @@ def II_DERET : InstrItinClass;
def II_ERETNC : InstrItinClass;
def II_EHB : InstrItinClass;
def II_SDBBP : InstrItinClass;
+def II_SIGRIE : InstrItinClass;
def II_SSNOP : InstrItinClass;
def II_SYSCALL : InstrItinClass;
def II_PAUSE : InstrItinClass;
@@ -546,6 +547,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_ERETNC , [InstrStage<1, [ALU]>]>,
InstrItinData<II_EHB , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SDBBP , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_SIGRIE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SSNOP , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SYSCALL , [InstrStage<1, [ALU]>]>,
InstrItinData<II_PAUSE , [InstrStage<1, [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 79c55dbb9e03..80ffe7ada7c8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -179,7 +179,7 @@ def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
- II_TRAP, II_SDBBP]>;
+ II_TRAP, II_SDBBP, II_SIGRIE]>;
// COP0 Pipeline
// =============
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 896dd0eb0a5e..ad8f4848b870 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -295,8 +295,10 @@ public:
bool inMips16HardFloat() const {
return inMips16Mode() && InMips16HardFloat;
}
- bool inMicroMipsMode() const { return InMicroMipsMode; }
- bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+ bool inMicroMipsMode() const { return InMicroMipsMode && !InMips16Mode; }
+ bool inMicroMips32r6Mode() const {
+ return inMicroMipsMode() && hasMips32r6();
+ }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
bool hasDSPR3() const { return HasDSPR3; }
@@ -312,14 +314,14 @@ public:
}
bool useSmallSection() const { return UseSmallSection; }
- bool hasStandardEncoding() const { return !inMips16Mode(); }
+ bool hasStandardEncoding() const { return !InMips16Mode && !InMicroMipsMode; }
bool useSoftFloat() const { return IsSoftFloat; }
bool useLongCalls() const { return UseLongCalls; }
bool enableLongBranchPass() const {
- return hasStandardEncoding() || allowMixed16_32();
+ return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32();
}
/// Features related to the presence of specific instructions.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 1e6fe2b9f7e7..8466298cf36f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -56,6 +56,7 @@ extern "C" void LLVMInitializeMipsTarget() {
initializeMipsDelaySlotFillerPass(*PR);
initializeMipsBranchExpansionPass(*PR);
initializeMicroMipsSizeReducePass(*PR);
+ initializeMipsPreLegalizerCombinerPass(*PR);
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -101,12 +102,6 @@ static Reloc::Model getEffectiveRelocModel(bool JIT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
// On function prologue, the stack is created by decrementing
// its pointer. Once decremented, all references are done with positive
// offset from the stack/frame pointer, using StackGrowsUp enables
@@ -121,7 +116,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this,
@@ -240,8 +235,8 @@ public:
bool addInstSelector() override;
void addPreEmitPass() override;
void addPreRegAlloc() override;
- void addPreEmit2() ;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
@@ -286,9 +281,6 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(BasicTTIImpl(this, F));
}
-void MipsPassConfig::addPreEmit2() {
-}
-
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
@@ -322,6 +314,10 @@ bool MipsPassConfig::addIRTranslator() {
return false;
}
+void MipsPassConfig::addPreLegalizeMachineIR() {
+ addPass(createMipsPreLegalizeCombiner());
+}
+
bool MipsPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index f767c8321988..f53ee0631b5e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -10,6 +10,7 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsMCExpr.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -189,6 +190,7 @@ const MCExpr *
MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
const MCExpr *Expr =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- return MCBinaryExpr::createAdd(
+ Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+ return MipsMCExpr::create(MipsMCExpr::MEK_DTPREL, Expr, getContext());
}
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index ab494d5bf41b..22be564b6502 100644
--- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -32,17 +32,18 @@ Target &llvm::getTheMips64elTarget() {
extern "C" void LLVMInitializeMipsTargetInfo() {
RegisterTarget<Triple::mips,
/*HasJIT=*/true>
- X(getTheMipsTarget(), "mips", "Mips", "Mips");
+ X(getTheMipsTarget(), "mips", "MIPS (32-bit big endian)", "Mips");
RegisterTarget<Triple::mipsel,
/*HasJIT=*/true>
- Y(getTheMipselTarget(), "mipsel", "Mipsel", "Mips");
+ Y(getTheMipselTarget(), "mipsel", "MIPS (32-bit little endian)", "Mips");
RegisterTarget<Triple::mips64,
/*HasJIT=*/true>
- A(getTheMips64Target(), "mips64", "Mips64 [experimental]", "Mips");
+ A(getTheMips64Target(), "mips64", "MIPS (64-bit big endian)", "Mips");
RegisterTarget<Triple::mips64el,
/*HasJIT=*/true>
- B(getTheMips64elTarget(), "mips64el", "Mips64el [experimental]", "Mips");
+ B(getTheMips64elTarget(), "mips64el", "MIPS (64-bit little endian)",
+ "Mips");
}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index aeb90eca3a05..f7b4cf3a0f72 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -25,6 +25,12 @@ NVPTXTargetStreamer::NVPTXTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
+void NVPTXTargetStreamer::outputDwarfFileDirectives() {
+ for (const std::string &S : DwarfFiles)
+ getStreamer().EmitRawText(S.data());
+ DwarfFiles.clear();
+}
+
void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
DwarfFiles.emplace_back(Directive);
}
@@ -82,9 +88,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
OS << "//\t}\n";
if (isDwarfSection(FI, Section)) {
// Emit DWARF .file directives in the outermost scope.
- for (const std::string &S : DwarfFiles)
- getStreamer().EmitRawText(S.data());
- DwarfFiles.clear();
+ outputDwarfFileDirectives();
OS << "//\t.section";
Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
FI->getTargetTriple(), OS, SubSection);
@@ -92,3 +96,30 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
OS << "//\t{\n";
}
}
+
+void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+ const char *Directive = MAI->getData8bitsDirective();
+ unsigned NumElements = Data.size();
+ const unsigned MaxLen = 40;
+ unsigned NumChunks = 1 + ((NumElements - 1) / MaxLen);
+ // Split the very long directives into several parts if the limit is
+ // specified.
+ for (unsigned I = 0; I < NumChunks; ++I) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ const char *Label = Directive;
+ for (auto It = std::next(Data.bytes_begin(), I * MaxLen),
+ End = (I == NumChunks - 1)
+ ? Data.bytes_end()
+ : std::next(Data.bytes_begin(), (I + 1) * MaxLen);
+ It != End; ++It) {
+ OS << Label << (unsigned)*It;
+ if (Label == Directive)
+ Label = ",";
+ }
+ Streamer.EmitRawText(OS.str());
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
index 30831ab8bbeb..f18e61cdca57 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
@@ -24,6 +24,9 @@ public:
NVPTXTargetStreamer(MCStreamer &S);
~NVPTXTargetStreamer() override;
+ /// Outputs the list of the DWARF '.file' directives to the streamer.
+ void outputDwarfFileDirectives();
+
/// Record DWARF file directives for later output.
/// According to PTX ISA, CUDA Toolkit documentation, 11.5.3. Debugging
/// Directives: .file
@@ -39,6 +42,10 @@ public:
void emitDwarfFileDirective(StringRef Directive) override;
void changeSection(const MCSection *CurSection, MCSection *Section,
const MCExpr *SubSection, raw_ostream &OS) override;
+ /// Emit the bytes in \p Data into the output.
+ ///
+ /// This is used to emit bytes in \p Data as sequence of .byte directives.
+ void emitRawBytes(StringRef Data) override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 902d1b25e7dd..07bfc58a8da7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -46,13 +46,14 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
-FunctionPass *createNVVMReflectPass();
+FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
+MachineFunctionPass *createNVPTXProxyRegErasurePass();
Target &getTheNVPTXTarget32();
Target &getTheNVPTXTarget64();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 6494c46f54ab..3731b2f37f6c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -54,6 +54,8 @@ def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
"Target SM 7.0">;
def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
"Target SM 7.2">;
+def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
+ "Target SM 7.5">;
// PTX Versions
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
@@ -72,6 +74,8 @@ def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
"Use PTX version 6.0">;
def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
"Use PTX version 6.1">;
+def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
+ "Use PTX version 6.3">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -94,6 +98,7 @@ def : Proc<"sm_61", [SM61, PTX50]>;
def : Proc<"sm_62", [SM62, PTX50]>;
def : Proc<"sm_70", [SM70, PTX60]>;
def : Proc<"sm_72", [SM72, PTX61]>;
+def : Proc<"sm_75", [SM75, PTX63]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index bed52293197d..bf922eb8a195 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -41,7 +41,7 @@ public:
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
Function::iterator I = function.begin();
- TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+ Instruction *firstTerminatorInst = (I++)->getTerminator();
for (Function::iterator E = function.end(); I != E; ++I) {
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index a966b9928400..6284ad8b82e8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "MCTargetDesc/NVPTXTargetStreamer.h"
#include "NVPTX.h"
#include "NVPTXMCExpr.h"
#include "NVPTXMachineFunctionInfo.h"
@@ -199,7 +200,7 @@ bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
// Ewwww
- TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
+ LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
const char *Sym = MFI->getImageHandleSymbol(Index);
@@ -218,11 +219,12 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
return;
}
+ const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (!nvptxSubtarget->hasImageHandles()) {
+ if (!STI.hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
@@ -328,11 +330,12 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
- const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+ const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+ const TargetLowering *TLI = STI.getTargetLowering();
Type *Ty = F->getReturnType();
- bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
if (Ty->getTypeID() == Type::VoidTyID)
return;
@@ -473,7 +476,6 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
}
bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
- nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
bool Result = AsmPrinter::runOnMachineFunction(F);
// Emit closing brace for the body of function F.
// The closing brace must be emitted here because we need to emit additional
@@ -507,8 +509,9 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
OutStreamer->AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
+ const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
OutStreamer->AddComment(Twine("implicit-def: ") +
- nvptxSubtarget->getRegisterInfo()->getName(RegNo));
+ STI.getRegisterInfo()->getName(RegNo));
}
OutStreamer->AddBlankLine();
}
@@ -727,6 +730,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = &*FI;
+ if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+ emitDeclaration(F, O);
+ continue;
+ }
+
if (F->isDeclaration()) {
if (F->use_empty())
continue;
@@ -785,11 +793,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// Construct a default subtarget off of the TargetMachine defaults. The
// rest of NVPTX isn't friendly to change subtargets per function and
// so the default TargetMachine will have all of the options.
- const Triple &TT = TM.getTargetTriple();
- StringRef CPU = TM.getTargetCPU();
- StringRef FS = TM.getTargetFeatureString();
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
- const NVPTXSubtarget STI(TT, CPU, FS, NTM);
+ const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
if (M.alias_size()) {
report_fatal_error("Module has aliases, which NVPTX does not support.");
@@ -813,7 +818,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
// Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, STI);
+ emitHeader(M, OS1, *STI);
OutStreamer->EmitRawText(OS1.str());
// Emit module-level inline asm if it exists.
@@ -880,8 +885,22 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
if (NTM.getDrvInterface() == NVPTX::NVCL)
O << ", texmode_independent";
+ bool HasFullDebugInfo = false;
+ for (DICompileUnit *CU : M.debug_compile_units()) {
+ switch(CU->getEmissionKind()) {
+ case DICompileUnit::NoDebug:
+ case DICompileUnit::DebugDirectivesOnly:
+ break;
+ case DICompileUnit::LineTablesOnly:
+ case DICompileUnit::FullDebug:
+ HasFullDebugInfo = true;
+ break;
+ }
+ if (HasFullDebugInfo)
+ break;
+ }
// FIXME: remove comment once debug info is properly supported.
- if (MMI && MMI->hasDebugInfo())
+ if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
O << "//, debug";
O << "\n";
@@ -938,6 +957,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
if (HasDebugInfo)
OutStreamer->EmitRawText("//\t}");
+ // Output last DWARF .file directives, if any.
+ static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
+ ->outputDwarfFileDirectives();
+
return ret;
//bool Result = AsmPrinter::doFinalization(M);
@@ -1412,12 +1435,14 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const AttributeList &PAL = F->getAttributes();
- const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+ const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+ const TargetLowering *TLI = STI.getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = isKernelFunction(*F);
- bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
+ bool hasImageHandles = STI.hasImageHandles();
MVT thePointerTy = TLI->getPointerTy(DL);
if (F->arg_empty()) {
@@ -1441,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isImage(*I)) {
std::string sname = I->getName();
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
@@ -1449,7 +1474,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "_param_" << paramIndex;
}
else { // Default image is read_only
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
@@ -1457,7 +1482,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "_param_" << paramIndex;
}
} else {
- if (nvptxSubtarget->hasImageHandles())
+ if (hasImageHandles)
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index efe98003b1c8..44a09f5fe513 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -258,9 +258,6 @@ private:
typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
VRegRCMap VRegMapping;
- // Cache the subtarget here.
- const NVPTXSubtarget *nvptxSubtarget;
-
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 21939d836dc7..ffc6a59cd6c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -981,9 +981,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!NVPTXLD)
return false;
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
ReplaceNode(N, NVPTXLD);
return true;
@@ -1221,9 +1220,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
}
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
ReplaceNode(N, LD);
return true;
@@ -1659,9 +1657,8 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
}
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = Mem->getMemOperand();
- cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = Mem->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
// For automatic generation of LDG (through SelectLoad[Vector], not the
// intrinsics), we may have an extending load like:
@@ -1864,9 +1861,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!NVPTXST)
return false;
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
ReplaceNode(N, NVPTXST);
return true;
}
@@ -2088,9 +2084,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
ReplaceNode(N, ST);
return true;
@@ -2236,9 +2231,8 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
return false;
SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
ReplaceNode(N, Ret);
return true;
@@ -2341,9 +2335,8 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *Ret =
CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
- MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
ReplaceNode(N, Ret);
return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2536623fb853..bec8ece29050 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -180,6 +180,18 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
return;
}
+ // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ auto const *SL = DL.getStructLayout(STy);
+ auto ElementNum = 0;
+ for(auto *EI : STy->elements()) {
+ ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(ElementNum));
+ ++ElementNum;
+ }
+ return;
+ }
+
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
@@ -560,8 +572,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
}
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
// No FPOW or FREM in PTX.
@@ -651,6 +663,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::CallSeqEnd";
case NVPTXISD::CallPrototype:
return "NVPTXISD::CallPrototype";
+ case NVPTXISD::ProxyReg:
+ return "NVPTXISD::ProxyReg";
case NVPTXISD::LoadV2:
return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4:
@@ -1170,7 +1184,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
TargetLoweringBase::LegalizeTypeAction
-NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
return TypeSplitVector;
if (VT == MVT::v2f16)
@@ -1649,7 +1663,24 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (!Func) {
+ // Both indirect calls and libcalls have nullptr Func. In order to distinguish
+ // between them we must rely on the call site value which is valid for
+ // indirect calls but is always null for libcalls.
+ bool isIndirectCall = !Func && CS;
+
+ if (isa<ExternalSymbolSDNode>(Callee)) {
+ Function* CalleeFunc = nullptr;
+
+ // Try to find the callee in the current module.
+ Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
+ assert(CalleeFunc != nullptr && "Libcall callee must be set.");
+
+ // Set the "libcall callee" attribute to indicate that the function
+ // must always have a declaration.
+ CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
+ }
+
+ if (isIndirectCall) {
// This is indirect function call case : PTX requires a prototype of the
// form
// proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
@@ -1673,7 +1704,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
};
// We model convergent calls as separate opcodes.
- unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall;
+ unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
if (CLI.IsConvergent)
Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
: NVPTXISD::PrintConvergentCall;
@@ -1707,12 +1738,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgEndOps[] = { Chain,
- DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
+ DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
InFlag = Chain.getValue(1);
- if (!Func) {
+ if (isIndirectCall) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrototypeOps[] = { Chain,
DAG.getConstant(uniqueCallSite, dl, MVT::i32),
@@ -1721,6 +1752,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
+ SmallVector<SDValue, 16> ProxyRegOps;
+ SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
+
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
SmallVector<EVT, 16> VTs;
@@ -1791,11 +1825,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineMemOperand::MOLoad);
for (unsigned j = 0; j < NumElts; ++j) {
- SDValue Ret = RetVal.getValue(j);
+ ProxyRegOps.push_back(RetVal.getValue(j));
+
if (needTruncate)
- Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
- InVals.push_back(Ret);
+ ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
+ else
+ ProxyRegTruncates.push_back(Optional<MVT>());
}
+
Chain = RetVal.getValue(NumElts);
InFlag = RetVal.getValue(NumElts + 1);
@@ -1811,8 +1848,29 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
true),
InFlag, dl);
+ InFlag = Chain.getValue(1);
uniqueCallSite++;
+ // Append ProxyReg instructions to the chain to make sure that `callseq_end`
+ // will not get lost. Otherwise, during libcalls expansion, the nodes can become
+ // dangling.
+ for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
+ SDValue Ret = DAG.getNode(
+ NVPTXISD::ProxyReg, dl,
+ DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
+ { Chain, ProxyRegOps[i], InFlag }
+ );
+
+ Chain = Ret.getValue(1);
+ InFlag = Ret.getValue(2);
+
+ if (ProxyRegTruncates[i].hasValue()) {
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
+ }
+
+ InVals.push_back(Ret);
+ }
+
// set isTailCall to false for now, until we figure out how to express
// tail call optimization in PTX
isTailCall = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ef04a8573d45..66fab2b6f480 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -51,6 +51,7 @@ enum NodeType : unsigned {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
+ ProxyReg,
FUN_SHFL_CLAMP,
FUN_SHFR_CLAMP,
MUL_WIDE_SIGNED,
@@ -511,7 +512,7 @@ public:
}
TargetLoweringBase::LegalizeTypeAction
- getPreferredVectorAction(EVT VT) const override;
+ getPreferredVectorAction(MVT VT) const override;
// Get the degree of precision we want from 32-bit floating point division
// operations.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 443b077184c7..02a40b9f5262 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1318,9 +1318,6 @@ def ROTR64reg_sw :
// Create SDNodes so they can be used in the DAG code, e.g.
// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
-def SDTIntShiftDOp :
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisInt<3>]>;
def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
@@ -1888,6 +1885,7 @@ def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
@@ -1975,6 +1973,9 @@ def PseudoUseParam :
def RETURNNode :
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPSideEffect]>;
+def ProxyReg :
+ SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
let mayLoad = 1 in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
@@ -2252,6 +2253,21 @@ def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+class ProxyRegInst<string SzStr, NVPTXRegClass regclass> :
+ NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+ !strconcat("mov.", SzStr, " \t$dst, $src;"),
+ [(set regclass:$dst, (ProxyReg regclass:$src))]>;
+
+let isCodeGenOnly=1, isPseudo=1 in {
+ def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>;
+ def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>;
+ def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>;
+ def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>;
+ def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>;
+ def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>;
+ def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>;
+ def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>;
+}
//
// Load / Store Handling
@@ -2544,7 +2560,7 @@ let mayStore=1, hasSideEffects=0 in {
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
NVPTXRegClass regclassOut> :
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
+ !strconcat("mov.b", SzStr, " \t$d, $a;"),
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
@@ -2625,32 +2641,20 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
- (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
(CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
- (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
(CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
- (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
(CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
// f16 -> uint
def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
- (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
(CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
- (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
(CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
- (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
(CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
// f32 -> sint
@@ -2948,14 +2952,10 @@ def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
// fpround f32 -> f16
def : Pat<(f16 (fpround Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
// fpround f64 -> f16
def : Pat<(f16 (fpround Float64Regs:$a)),
- (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN)>;
// fpround f64 -> f32
@@ -2972,8 +2972,6 @@ def : Pat<(f32 (fpextend Float16Regs:$a)),
// fpextend f16 -> f64
def : Pat<(f64 (fpextend Float16Regs:$a)),
- (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f64 (fpextend Float16Regs:$a)),
(CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
// fpextend f32 -> f64
@@ -2988,9 +2986,7 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
// fceil, ffloor, fround, ftrunc.
def : Pat<(fceil Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fceil Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI)>;
def : Pat<(fceil Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
@@ -2999,9 +2995,7 @@ def : Pat<(fceil Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
def : Pat<(ffloor Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ffloor Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI)>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
@@ -3009,10 +3003,8 @@ def : Pat<(ffloor Float32Regs:$a),
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(fround Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f16 (fround Float16Regs:$a)),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
@@ -3021,9 +3013,7 @@ def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
def : Pat<(ftrunc Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ftrunc Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
@@ -3036,9 +3026,7 @@ def : Pat<(ftrunc Float64Regs:$a),
// matches what CUDA's "libm" does.
def : Pat<(fnearbyint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fnearbyint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fnearbyint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
@@ -3047,9 +3035,7 @@ def : Pat<(fnearbyint Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
def : Pat<(frint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(frint Float16Regs:$a),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(frint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5bb4fc3edd09..2ca0ccf2dfa7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -60,6 +61,24 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isFI())
continue;
+
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ if (MI.isDebugValue()) {
+ assert(i == 0 && "Frame indices can only appear as the first "
+ "operand of a DBG_VALUE machine instruction");
+ unsigned Reg;
+ int64_t Offset =
+ TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+ MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
+ MI.getOperand(0).setIsDebug();
+ auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
+ DIExpression::NoDeref, Offset);
+ MI.getOperand(3).setMetadata(DIExpr);
+ continue;
+ }
+
TRI.eliminateFrameIndex(MI, 0, i, nullptr);
Modified = true;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
new file mode 100644
index 000000000000..f60d841c1683
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -0,0 +1,122 @@
+//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass is needed to remove ProxyReg instructions and restore related
+// registers. The instructions were needed at instruction selection stage to
+// make sure that callseq_end nodes won't be removed as "dead nodes". This can
+// happen when we expand instructions into libcalls and the call site doesn't
+// care about the libcall chain. Call site cares about data flow only, and the
+// latest data flow node happens to be before callseq_end. Therefore the node
+// becomes dangling and "dead". The ProxyReg acts like an additional data flow
+// node *after* the callseq_end in the chain and ensures that everything will be
+// preserved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
+}
+
+namespace {
+
+struct NVPTXProxyRegErasure : public MachineFunctionPass {
+public:
+ static char ID;
+ NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
+ initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "NVPTX Proxy Register Instruction Erasure";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
+
+ void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
+ MachineOperand &To);
+};
+
+} // namespace
+
+char NVPTXProxyRegErasure::ID = 0;
+
+INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
+
+bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
+ SmallVector<MachineInstr *, 16> RemoveList;
+
+ for (auto &BB : MF) {
+ for (auto &MI : BB) {
+ switch (MI.getOpcode()) {
+ case NVPTX::ProxyRegI1:
+ case NVPTX::ProxyRegI16:
+ case NVPTX::ProxyRegI32:
+ case NVPTX::ProxyRegI64:
+ case NVPTX::ProxyRegF16:
+ case NVPTX::ProxyRegF16x2:
+ case NVPTX::ProxyRegF32:
+ case NVPTX::ProxyRegF64:
+ replaceMachineInstructionUsage(MF, MI);
+ RemoveList.push_back(&MI);
+ break;
+ }
+ }
+ }
+
+ for (auto *MI : RemoveList) {
+ MI->eraseFromParent();
+ }
+
+ return !RemoveList.empty();
+}
+
+void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
+ MachineInstr &MI) {
+ auto &InOp = *MI.uses().begin();
+ auto &OutOp = *MI.defs().begin();
+
+ assert(InOp.isReg() && "ProxyReg input operand should be a register.");
+ assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
+
+ for (auto &BB : MF) {
+ for (auto &I : BB) {
+ replaceRegisterUsage(I, OutOp, InOp);
+ }
+ }
+}
+
+void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
+ MachineOperand &From,
+ MachineOperand &To) {
+ for (auto &Op : Instr.uses()) {
+ if (Op.isReg() && Op.getReg() == From.getReg()) {
+ Op.setReg(To.getReg());
+ }
+ }
+}
+
+MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
+ return new NVPTXProxyRegErasure();
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a1b160441df3..8ec0ddb9b3d5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -68,6 +68,7 @@ void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
} // end namespace llvm
@@ -87,6 +88,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
+ initializeNVPTXProxyRegErasurePass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -102,12 +104,6 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
return Ret;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -118,7 +114,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
// specified, as it is the only relocation model currently supported.
: LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
CPU, FS, Options, Reloc::PIC_,
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
@@ -166,6 +162,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
+ void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
@@ -195,7 +192,7 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createNVVMReflectPass());
+ PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
});
}
@@ -258,7 +255,8 @@ void NVPTXPassConfig::addIRPasses() {
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
// call addEarlyAsPossiblePasses.
- addPass(createNVVMReflectPass());
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
+ addPass(createNVVMReflectPass(ST.getSmVersion()));
if (getOptLevel() != CodeGenOpt::None)
addPass(createNVPTXImageOptimizerPass());
@@ -306,6 +304,11 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
+void NVPTXPassConfig::addPreRegAlloc() {
+ // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
+ addPass(createNVPTXProxyRegErasurePass());
+}
+
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
if (getOptLevel() != CodeGenOpt::None) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index a631055d36a0..14e93f7447dd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -49,6 +49,19 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
+ // Loads and stores can be vectorized if the alignment is at least as big as
+ // the load/store we want to vectorize.
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return Alignment >= ChainSizeInBytes;
+ }
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
+ }
+
// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
// We conservatively return 1 here which is just enough to enable the
// vectorizers but disables heuristics based on the number of registers.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 60971b48adfc..64c262664fda 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -50,7 +50,9 @@ namespace {
class NVVMReflect : public FunctionPass {
public:
static char ID;
- NVVMReflect() : FunctionPass(ID) {
+ unsigned int SmVersion;
+ NVVMReflect() : NVVMReflect(0) {}
+ explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
}
@@ -58,7 +60,9 @@ public:
};
}
-FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
+FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
+ return new NVVMReflect(SmVersion);
+}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -163,6 +167,8 @@ bool NVVMReflect::runOnFunction(Function &F) {
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
+ } else if (ReflectArg == "__CUDA_ARCH") {
+ ReflectVal = SmVersion * 10;
}
Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
ToRemove.push_back(Call);
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
deleted file mode 100644
index de0a5f9e84ea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- Nios2InstPrinter.cpp - Convert Nios2 MCInst to assembly syntax-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstPrinter.h"
-
-#include "Nios2InstrInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define PRINT_ALIAS_INSTR
-#include "Nios2GenAsmWriter.inc"
-
-void Nios2InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
-}
-
-void Nios2InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
- // Try to print any aliases first.
- if (!printAliasInstr(MI, STI, O))
- printInstruction(MI, STI, O);
- printAnnotation(O, Annot);
-}
-
-void Nios2InstPrinter::printOperand(const MCInst *MI, int OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- printRegName(O, Op.getReg());
- return;
- }
-
- if (Op.isImm()) {
- O << Op.getImm();
- return;
- }
-
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- Op.getExpr()->print(O, &MAI, true);
-}
-
-void Nios2InstPrinter::printMemOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
- // Load/Store memory operands -- imm($reg)
- printOperand(MI, opNum + 1, STI, O);
- O << "(";
- printOperand(MI, opNum, STI, O);
- O << ")";
-}
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
deleted file mode 100644
index 43a12951baea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//= Nios2InstPrinter.h - Convert Nios2 MCInst to assembly syntax -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-#define LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class Nios2InstPrinter : public MCInstPrinter {
-public:
- Nios2InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
-
- // Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
-
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
- raw_ostream &OS);
- void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
- raw_ostream &OS, const char *Modifier = nullptr);
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
deleted file mode 100644
index 8ac08c6837d9..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===-- Nios2AsmBackend.cpp - Nios2 Asm Backend --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "MCTargetDesc/Nios2AsmBackend.h"
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-using namespace llvm;
-
-// Prepare value for the target space for it
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value) {
-
- unsigned Kind = Fixup.getKind();
-
- // Add/subtract and shift
- switch (Kind) {
- default:
- return 0;
- case Nios2::fixup_Nios2_LO16:
- break;
- case Nios2::fixup_Nios2_HI16:
- // Get the higher 16-bits. Also add 1 if bit 15 is 1.
- Value = ((Value + 0x8000) >> 16) & 0xffff;
- break;
- }
-
- return Value;
-}
-
-// Calculate index for Nios2 specific little endian byte order
-static unsigned calculateLEIndex(unsigned i) {
- assert(i <= 3 && "Index out of range!");
-
- return (1 - i / 2) * 2 + i % 2;
-}
-
-/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
-/// data fragment, at the offset specified by the fixup and following the
-/// fixup kind as appropriate.
-void Nios2AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) const {
- MCFixupKind Kind = Fixup.getKind();
- Value = adjustFixupValue(Fixup, Value);
-
- if (!Value)
- return; // Doesn't change encoding.
-
- // Where do we start in the object
- unsigned Offset = Fixup.getOffset();
- // Number of bytes we need to fixup
- unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
- // Grab current value, if any, from bits.
- uint64_t CurVal = 0;
-
- for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = calculateLEIndex(i);
- CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8);
- }
-
- uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
- CurVal |= Value & Mask;
-
- // Write out the fixed up bytes back to the code/data bits.
- for (unsigned i = 0; i != NumBytes; ++i) {
- unsigned Idx = calculateLEIndex(i);
- Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
- }
-}
-
-Optional<MCFixupKind> Nios2AsmBackend::getFixupKind(StringRef Name) const {
- return StringSwitch<Optional<MCFixupKind>>(Name)
- .Case("R_NIOS2_NONE", (MCFixupKind)Nios2::fixup_Nios2_32)
- .Case("R_NIOS2_32", FK_Data_4)
- .Default(MCAsmBackend::getFixupKind(Name));
-}
-
-//@getFixupKindInfo {
-const MCFixupKindInfo &
-Nios2AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds] = {
- // This table *must* be in same the order of fixup_* kinds in
- // Nios2FixupKinds.h.
- //
- // name offset bits flags
- {"fixup_Nios2_32", 0, 32, 0},
- {"fixup_Nios2_HI16", 0, 16, 0},
- {"fixup_Nios2_LO16", 0, 16, 0}};
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-Nios2AsmBackend::createObjectTargetWriter() const {
- return createNios2ELFObjectWriter(MCELFObjectTargetWriter::getOSABI(OSType));
-}
-
-bool Nios2AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
- return true;
-}
-
-// MCAsmBackend
-MCAsmBackend *llvm::createNios2AsmBackend(const Target &T,
- const MCSubtargetInfo &STI,
- const MCRegisterInfo &MRI,
- const MCTargetOptions &Options) {
- return new Nios2AsmBackend(T, STI.getTargetTriple().getOS());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
deleted file mode 100644
index 1f114bd869b1..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- Nios2AsmBackend.h - Nios2 Asm Backend ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmBackend.h"
-
-namespace llvm {
-
-class MCAssembler;
-struct MCFixupKindInfo;
-class Target;
-class MCObjectWriter;
-
-class Nios2AsmBackend : public MCAsmBackend {
- Triple::OSType OSType;
-
-public:
- Nios2AsmBackend(const Target &T, Triple::OSType OSType)
- : MCAsmBackend(support::little), OSType(OSType) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override;
-
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-
- void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved) const override;
-
- Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
- unsigned getNumFixupKinds() const override {
- return Nios2::NumTargetFixupKinds;
- }
-
- /// MayNeedRelaxation - Check whether the given instruction may need
- /// relaxation.
- ///
- /// \param Inst - The instruction to test.
- bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
-
- /// fixupNeedsRelaxation - Target specific predicate for whether a given
- /// fixup requires the associated instruction to be relaxed.
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- // FIXME.
- llvm_unreachable("RelaxInstruction() unimplemented");
- return false;
- }
-
- /// RelaxInstruction - Relax the instruction in the given fragment
- /// to the next wider instruction.
- ///
- /// \param Inst - The instruction to relax, which may be the same
- /// as the output.
- /// \param [out] Res On return, the relaxed instruction.
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
-}; // class Nios2AsmBackend
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
deleted file mode 100644
index 225671ebc8d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- Nios2BaseInfo.h - Top level definitions for NIOS2 MC ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the Nios2 target useful for the compiler back-end and the MC libraries.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-
-namespace llvm {
-
-/// Nios2FG - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-namespace Nios2FG {
-/// Target Operand Flag enum.
-enum TOF {
- //===------------------------------------------------------------------===//
- // Nios2 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
- /// address.
- MO_ABS_HI,
- MO_ABS_LO,
-
-};
-} // namespace Nios2FG
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
deleted file mode 100644
index db432d15120d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Nios2ELFObjectWriter.cpp - Nios2 ELF Writer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCObjectWriter.h"
-
-using namespace llvm;
-
-namespace {
-class Nios2ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- Nios2ELFObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(false, OSABI, ELF::EM_ALTERA_NIOS2, false) {}
-
- ~Nios2ELFObjectWriter() override;
-
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const override;
-};
-} // namespace
-
-Nios2ELFObjectWriter::~Nios2ELFObjectWriter() {}
-
-unsigned Nios2ELFObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return 0;
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-llvm::createNios2ELFObjectWriter(uint8_t OSABI) {
- return llvm::make_unique<Nios2ELFObjectWriter>(OSABI);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
deleted file mode 100644
index c169a1b19371..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- Nios2FixupKinds.h - Nios2 Specific Fixup Entries --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace Nios2 {
-// Although most of the current fixup types reflect a unique relocation
-// one can have multiple fixup types for a given relocation and thus need
-// to be uniquely named.
-//
-// This table *must* be in the save order of
-// MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds]
-// in Nios2AsmBackend.cpp.
-enum Fixups {
- // Pure upper 32 bit fixup resulting in - R_NIOS2_32.
- fixup_Nios2_32 = FirstTargetFixupKind,
-
- // Pure upper 16 bit fixup resulting in - R_NIOS2_HI16.
- fixup_Nios2_HI16,
-
- // Pure lower 16 bit fixup resulting in - R_NIOS2_LO16.
- fixup_Nios2_LO16,
-
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-} // namespace Nios2
-} // namespace llvm
-
-#endif // LLVM_NIOS2_NIOS2FIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
deleted file mode 100644
index e3c66e6776c2..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- Nios2MCAsmInfo.cpp - Nios2 Asm Properties -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the Nios2MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCAsmInfo.h"
-
-#include "llvm/ADT/Triple.h"
-
-using namespace llvm;
-
-void Nios2MCAsmInfo::anchor() {}
-
-Nios2MCAsmInfo::Nios2MCAsmInfo(const Triple &TheTriple) {
- if ((TheTriple.getArch() == Triple::nios2))
- IsLittleEndian = true; // the default of IsLittleEndian is true
-
- AlignmentIsInBytes = false;
- Data16bitsDirective = "\t.2byte\t";
- Data32bitsDirective = "\t.4byte\t";
- Data64bitsDirective = "\t.8byte\t";
- PrivateLabelPrefix = ".LC";
- CommentString = "#";
- ZeroDirective = "\t.space\t";
- GPRel32Directive = "\t.gpword\t";
- GPRel64Directive = "\t.gpdword\t";
- WeakRefDirective = "\t.weak\t";
- GlobalDirective = "\t.global\t";
- AscizDirective = "\t.string\t";
- UseAssignmentForEHBegin = true;
-
- SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
- DwarfRegNumForCFI = true;
- UsesELFSectionDirectiveForBSS = true;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
deleted file mode 100644
index 0c81276f84d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- Nios2MCAsmInfo.h - Nios2 Asm Info ----------------------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Nios2MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-
-#include "llvm/MC/MCAsmInfoELF.h"
-
-namespace llvm {
-class Triple;
-
-class Nios2MCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
-
-public:
- explicit Nios2MCAsmInfo(const Triple &TheTriple);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
deleted file mode 100644
index 0f12c9e93378..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2MCExpr.cpp - Nios2 specific MC expression classes ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-
-#include "Nios2MCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2mcexpr"
-
-const Nios2MCExpr *Nios2MCExpr::create(Nios2MCExpr::Nios2ExprKind Kind,
- const MCExpr *Expr, MCContext &Ctx) {
- return new (Ctx) Nios2MCExpr(Kind, Expr);
-}
-
-const Nios2MCExpr *Nios2MCExpr::create(const MCSymbol *Symbol,
- Nios2MCExpr::Nios2ExprKind Kind,
- MCContext &Ctx) {
- const MCSymbolRefExpr *MCSym =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
- return new (Ctx) Nios2MCExpr(Kind, MCSym);
-}
-
-void Nios2MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-
- switch (Kind) {
- case CEK_None:
- case CEK_Special:
- llvm_unreachable("CEK_None and CEK_Special are invalid");
- break;
- case CEK_ABS_HI:
- OS << "%hiadj";
- break;
- case CEK_ABS_LO:
- OS << "%lo";
- break;
- }
-
- OS << '(';
- Expr->print(OS, MAI, true);
- OS << ')';
-}
-
-bool Nios2MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const {
- return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
-}
-
-void Nios2MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
- Streamer.visitUsedExpr(*getSubExpr());
-}
-
-void Nios2MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
- switch (getKind()) {
- case CEK_None:
- case CEK_Special:
- llvm_unreachable("CEK_None and CEK_Special are invalid");
- break;
- case CEK_ABS_HI:
- case CEK_ABS_LO:
- break;
- }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
deleted file mode 100644
index 5b49005eb648..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2MCExpr.h - Nios2 specific MC expression classes ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
-
-namespace llvm {
-
-class Nios2MCExpr : public MCTargetExpr {
-public:
- enum Nios2ExprKind {
- CEK_None,
- CEK_ABS_HI,
- CEK_ABS_LO,
- CEK_Special,
- };
-
-private:
- const Nios2ExprKind Kind;
- const MCExpr *Expr;
-
- explicit Nios2MCExpr(Nios2ExprKind Kind, const MCExpr *Expr)
- : Kind(Kind), Expr(Expr) {}
-
-public:
- static const Nios2MCExpr *create(Nios2ExprKind Kind, const MCExpr *Expr,
- MCContext &Ctx);
- static const Nios2MCExpr *create(const MCSymbol *Symbol,
- Nios2MCExpr::Nios2ExprKind Kind,
- MCContext &Ctx);
-
- /// Get the kind of this expression.
- Nios2ExprKind getKind() const { return Kind; }
-
- /// Get the child of this expression.
- const MCExpr *getSubExpr() const { return Expr; }
-
- void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
- bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
- const MCFixup *Fixup) const override;
- void visitUsedExpr(MCStreamer &Streamer) const override;
- MCFragment *findAssociatedFragment() const override {
- return getSubExpr()->findAssociatedFragment();
- }
-
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
deleted file mode 100644
index e57b44d3cfdc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCTargetDesc.h"
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "Nios2MCAsmInfo.h"
-#include "Nios2TargetStreamer.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_MC_DESC
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "Nios2GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-static MCInstrInfo *createNios2MCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitNios2MCInstrInfo(X); // defined in Nios2GenInstrInfo.inc
- return X;
-}
-
-static MCRegisterInfo *createNios2MCRegisterInfo(const Triple &TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitNios2MCRegisterInfo(X, Nios2::R15); // defined in Nios2GenRegisterInfo.inc
- return X;
-}
-
-static MCSubtargetInfo *
-createNios2MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- if (CPU.empty() || CPU == "generic")
- CPU = "nios2r1";
- return createNios2MCSubtargetInfoImpl(TT, CPU, FS);
- // createNios2MCSubtargetInfoImpl defined in Nios2GenSubtargetInfo.inc
-}
-
-static MCAsmInfo *createNios2MCAsmInfo(const MCRegisterInfo &MRI,
- const Triple &TT) {
- MCAsmInfo *MAI = new Nios2MCAsmInfo(TT);
-
- unsigned SP = MRI.getDwarfRegNum(Nios2::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
- MAI->addInitialFrameState(Inst);
-
- return MAI;
-}
-
-static MCInstPrinter *createNios2MCInstPrinter(const Triple &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
- return new Nios2InstPrinter(MAI, MII, MRI);
-}
-
-static MCTargetStreamer *createNios2AsmTargetStreamer(MCStreamer &S,
- formatted_raw_ostream &OS,
- MCInstPrinter *InstPrint,
- bool isVerboseAsm) {
- return new Nios2TargetAsmStreamer(S, OS);
-}
-
-extern "C" void LLVMInitializeNios2TargetMC() {
- Target *T = &getTheNios2Target();
-
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(*T, createNios2MCAsmInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(*T, createNios2MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(*T, createNios2MCRegisterInfo);
-
- // Register the asm target streamer.
- TargetRegistry::RegisterAsmTargetStreamer(*T, createNios2AsmTargetStreamer);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(*T, createNios2MCSubtargetInfo);
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(*T, createNios2MCInstPrinter);
-
- // Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(*T, createNios2AsmBackend);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
deleted file mode 100644
index a7c4b16c6a3b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-
-#include <memory>
-
-namespace llvm {
-class MCAsmBackend;
-class MCObjectTargetWriter;
-class MCRegisterInfo;
-class MCSubtargetInfo;
-class MCTargetOptions;
-class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
-
-Target &getTheNios2Target();
-
-MCAsmBackend *createNios2AsmBackend(const Target &T, const MCSubtargetInfo &STI,
- const MCRegisterInfo &MRI,
- const MCTargetOptions &Options);
-
-std::unique_ptr<MCObjectTargetWriter> createNios2ELFObjectWriter(uint8_t OSABI);
-
-} // namespace llvm
-
-// Defines symbolic names for Nios2 registers. This defines a mapping from
-// register name to register number.
-#define GET_REGINFO_ENUM
-#include "Nios2GenRegisterInfo.inc"
-
-// Defines symbolic names for the Nios2 instructions.
-#define GET_INSTRINFO_ENUM
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "Nios2GenSubtargetInfo.inc"
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
deleted file mode 100644
index 795fd0084aa3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- Nios2TargetStreamer.cpp - Nios2 Target Streamer Methods -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target streamer methods.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetStreamer.h"
-
-using namespace llvm;
-
-Nios2TargetStreamer::Nios2TargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-
-Nios2TargetAsmStreamer::Nios2TargetAsmStreamer(MCStreamer &S,
- formatted_raw_ostream &OS)
- : Nios2TargetStreamer(S) {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.h b/contrib/llvm/lib/Target/Nios2/Nios2.h
deleted file mode 100644
index d6c5c1e49662..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM Nios2 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2_H
-
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class FunctionPass;
-class formatted_raw_ostream;
-class Nios2TargetMachine;
-class AsmPrinter;
-class MachineInstr;
-class MCInst;
-
-FunctionPass *createNios2ISelDag(Nios2TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-void LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP);
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.td b/contrib/llvm/lib/Target/Nios2/Nios2.td
deleted file mode 100644
index 1acf4c70c42c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-include "Nios2RegisterInfo.td"
-include "Nios2Schedule.td"
-include "Nios2InstrInfo.td"
-include "Nios2CallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Nios2 Subtarget features
-//===----------------------------------------------------------------------===//
-def FeatureNios2r1 : SubtargetFeature<"nios2r1", "Nios2ArchVersion",
- "Nios2r1", "Nios2 R1 ISA Support">;
-def FeatureNios2r2 : SubtargetFeature<"nios2r2", "Nios2ArchVersion",
- "Nios2r2", "Nios2 R2 ISA Support">;
-
-//===----------------------------------------------------------------------===//
-// Nios2 processors supported.
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, Nios2GenericItineraries, Features>;
-
-def : Proc<"nios2r1", [FeatureNios2r1]>;
-def : Proc<"nios2r2", [FeatureNios2r2]>;
-
-def Nios2InstrInfo : InstrInfo;
-
-def Nios2AsmParser : AsmParser {
- let ShouldEmitMatchRegisterName = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-
-def Nios2AsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- int PassSubtarget = 1;
- int Variant = 0;
-}
-
-def Nios2 : Target {
-// def Nios2InstrInfo : InstrInfo as before.
- let InstructionSet = Nios2InstrInfo;
- let AssemblyParsers = [Nios2AsmParser];
- let AssemblyWriters = [Nios2AsmWriter];
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp b/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
deleted file mode 100644
index 1abf19591774..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- Nios2AsmPrinter.cpp - Nios2 LLVM Assembly Printer -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format NIOS2 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-asm-printer"
-
-namespace {
-
-class Nios2AsmPrinter : public AsmPrinter {
-
-public:
- explicit Nios2AsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
-
- StringRef getPassName() const override { return "Nios2 Assembly Printer"; }
-
- //- EmitInstruction() must exists or will have run time error.
- void EmitInstruction(const MachineInstr *MI) override;
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void EmitFunctionEntryLabel() override;
-};
-} // namespace
-
-//- EmitInstruction() must exists or will have run time error.
-void Nios2AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-
- // Print out both ordinary instruction and boudle instruction
- MachineBasicBlock::const_instr_iterator I = MI->getIterator();
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
-
- do {
-
- if (I->isPseudo()) {
- llvm_unreachable("Pseudo opcode found in EmitInstruction()");
- }
-
- MCInst TmpInst0;
- LowerNios2MachineInstToMCInst(&*I, TmpInst0, *this);
- EmitToStreamer(*OutStreamer, TmpInst0);
- } while ((++I != E) && I->isInsideBundle()); // Delay slot check
-}
-
-// .type main,@function
-//-> .ent main # @main
-// main:
-void Nios2AsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer->EmitLabel(CurrentFnSym);
-}
-
-// Print out an operand for an inline asm expression.
-bool Nios2AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- printOperand(MI, OpNum, O);
- return false;
-}
-
-bool Nios2AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
-
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline asm memory operand");
- O << "($" << Nios2InstPrinter::getRegisterName(MO.getReg()) << ")";
-
- return false;
-}
-
-void Nios2AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- bool closeP = false;
-
- if (MO.getTargetFlags())
- closeP = true;
-
- switch (MO.getTargetFlags()) {
- case Nios2FG::MO_ABS_HI:
- O << "%hiadj(";
- break;
- case Nios2FG::MO_ABS_LO:
- O << "%lo(";
- break;
- }
-
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- O << '$'
- << StringRef(Nios2InstPrinter::getRegisterName(MO.getReg())).lower();
- break;
-
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- break;
-
- case MachineOperand::MO_MachineBasicBlock:
- MO.getMBB()->getSymbol()->print(O, MAI);
- return;
-
- case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
- break;
-
- case MachineOperand::MO_BlockAddress:
- O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- O << MO.getSymbolName();
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
- }
-
- if (closeP)
- O << ")";
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNios2AsmPrinter() {
- RegisterAsmPrinter<Nios2AsmPrinter> X(getTheNios2Target());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td b/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
deleted file mode 100644
index f0b172f8422d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- Nios2CallingConv.td - Calling Conventions for Nios2 -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for Nios2 architecture.
-//===----------------------------------------------------------------------===//
-
-/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
- CCIf<!strconcat("State.getTarget().getSubtarget<Nios2Subtarget>().", F), A>;
-
-def CC_Nios2 : CallingConv<[
- // i32 f32 arguments get passed in integer registers if there is space.
- CCIfType<[i32, f32], CCAssignToReg<[R4, R5, R6, R7]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
-
-def RetCC_Nios2EABI : CallingConv<[
- // i32 are returned in registers R2, R3
- CCIfType<[i32], CCAssignToReg<[R2, R3]>>,
- // In case of floating point (FPH2 instr.) also use the same register set
- CCIfType<[f32], CCAssignToReg<[R2, R3]>>,
- CCIfByVal<CCPassByVal<4, 4>>,
- // Stack parameter slots for i32 is 32-bit words and 4-byte aligned.
- CCIfType<[i32], CCAssignToStack<4, 4>>
-]>;
-
-def CSR : CalleeSavedRegs<(add RA, FP, (sequence "R%u", 16, 23))>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
deleted file mode 100644
index 6fb28a6fd638..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- Nios2FrameLowering.cpp - Nios2 Frame Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2FrameLowering.h"
-
-#include "Nios2Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-
-using namespace llvm;
-
-bool Nios2FrameLowering::hasFP(const MachineFunction &MF) const { return true; }
-
-void Nios2FrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
-
-void Nios2FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
deleted file mode 100644
index 4ffb01dda36a..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2FrameLowering.h - Define frame lowering for Nios2 --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-class Nios2FrameLowering : public TargetFrameLowering {
-protected:
- const Nios2Subtarget &STI;
-
-public:
- explicit Nios2FrameLowering(const Nios2Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0, 4),
- STI(sti) {}
-
- bool hasFP(const MachineFunction &MF) const override;
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
deleted file mode 100644
index 5f9679466115..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Nios2 ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the NIOS2 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-isel"
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Nios2DAGToDAGISel - NIOS2 specific code to select NIOS2 machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class Nios2DAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Nios2 Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const Nios2Subtarget *Subtarget;
-
-public:
- explicit Nios2DAGToDAGISel(Nios2TargetMachine &TM, CodeGenOpt::Level OL)
- : SelectionDAGISel(TM, OL) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<Nios2Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
- void Select(SDNode *N) override;
-
- // Pass Name
- StringRef getPassName() const override {
- return "NIOS2 DAG->DAG Pattern Instruction Selection";
- }
-
-#include "Nios2GenDAGISel.inc"
-};
-} // namespace
-
-// Select instructions not customized! Used for
-// expanded, promoted and normal instructions
-void Nios2DAGToDAGISel::Select(SDNode *Node) {
-
- // If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
- Node->setNodeId(-1);
- return;
- }
-
- // Select the default instruction
- SelectCode(Node);
-}
-
-FunctionPass *llvm::createNios2ISelDag(Nios2TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new Nios2DAGToDAGISel(TM, OptLevel);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
deleted file mode 100644
index 008ce1570722..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===-- Nios2ISelLowering.cpp - Nios2 DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2ISelLowering.h"
-#include "Nios2MachineFunction.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "Nios2GenCallingConv.inc"
-
-SDValue
-Nios2TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool IsVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &DL, SelectionDAG &DAG) const {
- // CCValAssign - represent the assignment of
- // the return value to a location
- SmallVector<CCValAssign, 16> RVLocs;
- MachineFunction &MF = DAG.getMachineFunction();
-
- // CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- // Analyze return values.
- CCInfo.CheckReturn(Outs, RetCC_Nios2EABI);
-
- SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- SDValue Val = OutVals[i];
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
-
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
-
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
-
- // Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
- }
-
- if (Flag.getNode())
- RetOps.push_back(Flag);
-
- return DAG.getNode(Nios2ISD::Ret, DL, MVT::Other, RetOps);
-}
-
-// addLiveIn - This helper function adds the specified physical register to the
-// MachineFunction as a live in value. It also creates a corresponding
-// virtual register for it.
-static unsigned addLiveIn(MachineFunction &MF, unsigned PReg,
- const TargetRegisterClass *RC) {
- unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
- MF.getRegInfo().addLiveIn(PReg, VReg);
- return VReg;
-}
-
-//===----------------------------------------------------------------------===//
-// Formal Arguments Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-// LowerFormalArguments - transform physical registers into virtual registers
-// and generate load operations for arguments places on the stack.
-SDValue Nios2TargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
-
- CCInfo.AnalyzeFormalArguments(Ins, CC_Nios2);
-
- // Used with vargs to acumulate store chains.
- std::vector<SDValue> OutChains;
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- EVT ValVT = VA.getValVT();
-
- // Arguments stored on registers
- if (VA.isRegLoc()) {
- MVT RegVT = VA.getLocVT();
- unsigned ArgReg = VA.getLocReg();
- const TargetRegisterClass *RC = getRegClassFor(RegVT);
-
- // Transform the arguments stored on
- // physical registers into virtual ones
- unsigned Reg = addLiveIn(MF, ArgReg, RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it has been passed promoted
- // to 32 bits. Insert an assert[sz]ext to capture this, then
- // truncate to the right size.
- if (VA.getLocInfo() != CCValAssign::Full) {
- unsigned Opcode = 0;
- if (VA.getLocInfo() == CCValAssign::SExt)
- Opcode = ISD::AssertSext;
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- Opcode = ISD::AssertZext;
- if (Opcode)
- ArgValue =
- DAG.getNode(Opcode, DL, RegVT, ArgValue, DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
- }
-
- // Handle floating point arguments passed in integer registers.
- if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
- (RegVT == MVT::i64 && ValVT == MVT::f64))
- ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
- InVals.push_back(ArgValue);
- } else { // VA.isRegLoc()
- MVT LocVT = VA.getLocVT();
-
- // sanity check
- assert(VA.isMemLoc());
-
- // The stack pointer offset is relative to the caller stack frame.
- int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
- VA.getLocMemOffset(), true);
-
- // Create load nodes to retrieve arguments from the stack
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Load = DAG.getLoad(
- LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- InVals.push_back(Load);
- OutChains.push_back(Load.getValue(1));
- }
- }
- if (!OutChains.empty()) {
- OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
- }
-
- return Chain;
-}
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-Nios2TargetLowering::Nios2TargetLowering(const TargetMachine &TM,
- const Nios2Subtarget &STI)
- : TargetLowering(TM), Subtarget(&STI) {
-
- addRegisterClass(MVT::i32, &Nios2::CPURegsRegClass);
- computeRegisterProperties(Subtarget->getRegisterInfo());
-}
-
-const char *Nios2TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- case Nios2ISD::Hi:
- return "Nios2ISD::Hi";
- case Nios2ISD::Lo:
- return "Nios2ISD::Lo";
- case Nios2ISD::Ret:
- return "Nios2ISD::Ret";
- }
- return nullptr;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
deleted file mode 100644
index c3c8179054bb..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
+++ /dev/null
@@ -1,63 +0,0 @@
-//===-- Nios2ISelLowering.h - Nios2 DAG Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-namespace Nios2ISD {
-enum NodeType {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Get the Higher 16 bits from a 32-bit immediate
- // No relation with Nios2 Hi register
- Hi,
- // Get the Lower 16 bits from a 32-bit immediate
- // No relation with Nios2 Lo register
- Lo,
- // Return
- Ret
-};
-}
-
-class Nios2TargetLowering : public TargetLowering {
- const Nios2Subtarget *Subtarget;
-
-public:
- Nios2TargetLowering(const TargetMachine &TM, const Nios2Subtarget &STI);
-
- /// getTargetNodeName - This method returns the name of a target specific
- // DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
- SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
- bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
- SelectionDAG &DAG) const override;
-};
-} // end namespace llvm
-
-#endif // NIOS2_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
deleted file mode 100644
index f57bf03bba3c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
+++ /dev/null
@@ -1,235 +0,0 @@
-//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Describe NIOS2 instructions format
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<6> val> {
- bits<6> Value = val;
-}
-
-def Pseudo : Format<0>;
-// Nios2 R1 instr formats:
-def FrmI : Format<1>;
-def FrmR : Format<2>;
-def FrmJ : Format<3>;
-def FrmOther : Format<4>; // Instruction w/ a custom format
-// Nios2 R2 instr 32-bit formats:
-def FrmL26 : Format<5>; // corresponds to J format in R1
-def FrmF2I16 : Format<6>; // corresponds to I format in R1
-def FrmF2X4I12 : Format<7>;
-def FrmF1X4I12 : Format<8>;
-def FrmF1X4L17 : Format<9>;
-def FrmF3X6L5 : Format<10>; // corresponds to R format in R1
-def FrmF2X6L10 : Format<11>;
-def FrmF3X6 : Format<12>; // corresponds to R format in R1
-def FrmF3X8 : Format<13>; // corresponds to custom format in R1
-// Nios2 R2 instr 16-bit formats:
-def FrmI10 : Format<14>;
-def FrmT1I7 : Format<15>;
-def FrmT2I4 : Format<16>;
-def FrmT1X1I6 : Format<17>;
-def FrmX1I7 : Format<18>;
-def FrmL5I4X1 : Format<19>;
-def FrmT2X1L3 : Format<20>;
-def FrmT2X1I3 : Format<21>;
-def FrmT3X1 : Format<22>;
-def FrmT2X3 : Format<23>;
-def FrmF1X1 : Format<24>;
-def FrmX2L5 : Format<25>;
-def FrmF1I5 : Format<26>;
-def FrmF2 : Format<27>;
-
-//===----------------------------------------------------------------------===//
-// Instruction Predicates:
-//===----------------------------------------------------------------------===//
-
-def isNios2r1 : Predicate<"Subtarget->isNios2r1()">;
-def isNios2r2 : Predicate<"Subtarget->isNios2r2()">;
-
-class PredicateControl {
- // Predicates related to specific target CPU features
- list<Predicate> FeaturePredicates = [];
- // Predicates for the instruction group membership in given ISA
- list<Predicate> InstrPredicates = [];
-
- list<Predicate> Predicates = !listconcat(FeaturePredicates, InstrPredicates);
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for 32-bit, 16-bit and pseudo instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>: Instruction,
- PredicateControl {
- field bits<32> Inst;
- Format Form = f;
-
- let Namespace = "Nios2";
- let Size = 4;
-
- bits<6> Opcode = 0;
-
- // Bottom 6 bits are the 'opcode' field
- let Inst{5-0} = Opcode;
-
- let OutOperandList = outs;
- let InOperandList = ins;
-
- let AsmString = asmstr;
- let Pattern = pattern;
- let Itinerary = itin;
-
- // Attributes specific to Nios2 instructions:
-
- // TSFlags layout should be kept in sync with Nios2InstrInfo.h.
- let TSFlags{5-0} = Form.Value;
- let DecoderNamespace = "Nios2";
- field bits<32> SoftFail = 0;
-}
-
-class Nios2Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass Itin = IIPseudo>:
- Nios2Inst32<outs, ins, asmstr, pattern, Itin, Pseudo> {
-
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for R1 and R2 instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2R1Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>:
- Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
- let DecoderNamespace = "Nios2";
- let InstrPredicates = [isNios2r1];
-}
-
-class Nios2R2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>:
- Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
- let DecoderNamespace = "Nios2r2";
- let InstrPredicates = [isNios2r2];
-}
-
-//===----------------------------------------------------------------------===//
-// Format I instruction class in Nios2 : <|A|B|immediate|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
- pattern, itin, FrmI> {
-
- bits<5> rA;
- bits<5> rB;
- bits<16> imm;
-
- let Opcode = op;
-
- let Inst{31-27} = rA;
- let Inst{26-22} = rB;
- let Inst{21-6} = imm;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Format R instruction : <|A|B|C|opx|imm|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FR<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
- pattern, itin, FrmR> {
- bits<5> rA;
- bits<5> rB;
- bits<5> rC;
- bits<5> imm = 0;
-
- let Opcode = 0x3a; /* opcode is always 0x3a for R instr. */
-
- let Inst{31-27} = rA;
- let Inst{26-22} = rB;
- let Inst{21-17} = rC;
- let Inst{16-11} = opx; /* opx stands for opcode extension */
- let Inst{10-6} = imm; /* optional 5-bit immediate value */
-}
-
-//===----------------------------------------------------------------------===//
-// Format J instruction class in Nios2 : <|address|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>:
- Nios2R1Inst32<outs, ins, asmstr, pattern, itin, FrmJ> {
- bits<26> addr;
- let Opcode = op;
- let Inst{31-6} = addr;
-}
-
-//===----------------------------------------------------------------------===//
-// Format F3X6 (R2) instruction : <|opx|RSV|C|B|A|opcode|>
-//===----------------------------------------------------------------------===//
-
-class F3X6<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>:
- Nios2R2Inst32<outs, ins, asmstr, pattern, itin, FrmF3X6> {
- bits<5> rC;
- bits<5> rB;
- bits<5> rA;
- bits<5> rsv = 0;
-
- let Opcode = 0x20; /* opcode is always 0x20 (OPX group) for F3X6 instr. */
-
- let Inst{31-26} = opx; /* opx stands for opcode extension */
- let Inst{25-21} = rsv;
- let Inst{20-16} = rC;
- let Inst{15-11} = rB;
- let Inst{10-6} = rA;
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclasses for common instructions of both R1 and R2:
-//===----------------------------------------------------------------------===//
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values differ between R1 and R2
-multiclass CommonInstr_R_F3X6_opx<bits<6> opxR1, bits<6> opxR2, dag outs,
- dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin> {
- def NAME#_R1 : FR<opxR1, outs, ins, asmstr, pattern, itin>;
- def NAME#_R2 : F3X6<opxR2, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values are the same in R1 and R2
-multiclass CommonInstr_R_F3X6<bits<6> opx, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- CommonInstr_R_F3X6_opx<opx, opx, outs, ins, asmstr, pattern, itin>;
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values differ between R1 and R2
-multiclass CommonInstr_I_F2I16_op<bits<6> opR1, bits<6> opR2, dag outs, dag ins,
- string asmstr, list<dag> pattern,
- InstrItinClass itin> {
- def NAME#_R1 : FI<opR1, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values are the same in R1 and R2
-multiclass CommonInstr_I_F2I16<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- CommonInstr_I_F2I16_op<op, op, outs, ins, asmstr, pattern, itin>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
deleted file mode 100644
index 9700cba3595b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Nios2InstrInfo.cpp - Nios2 Instruction Information ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstrInfo.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "Nios2GenInstrInfo.inc"
-
-// Pin the vtable to this file.
-void Nios2InstrInfo::anchor() {}
-
-Nios2InstrInfo::Nios2InstrInfo(Nios2Subtarget &ST)
- : Nios2GenInstrInfo(), RI(ST), Subtarget(ST) {}
-
-/// Expand Pseudo instructions into real backend instructions
-bool Nios2InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- MachineBasicBlock &MBB = *MI.getParent();
-
- switch (MI.getDesc().getOpcode()) {
- default:
- return false;
- case Nios2::RetRA:
- BuildMI(MBB, MI, MI.getDebugLoc(), get(Nios2::RET_R1)).addReg(Nios2::RA);
- break;
- }
-
- MBB.erase(MI);
- return true;
-}
-
-void Nios2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc) const {
- unsigned opc = Subtarget.hasNios2r2() ? Nios2::ADD_R2 : Nios2::ADD_R1;
- BuildMI(MBB, I, DL, get(opc))
- .addReg(DestReg, RegState::Define)
- .addReg(Nios2::ZERO)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
deleted file mode 100644
index 52f6e7e9c7c8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- Nios2InstrInfo.h - Nios2 Instruction Information --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-
-#include "Nios2RegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "Nios2GenInstrInfo.inc"
-
-namespace llvm {
-
-class Nios2Subtarget;
-
-class Nios2InstrInfo : public Nios2GenInstrInfo {
- const Nios2RegisterInfo RI;
- const Nios2Subtarget &Subtarget;
- virtual void anchor();
-
-public:
- explicit Nios2InstrInfo(Nios2Subtarget &ST);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- const Nios2RegisterInfo &getRegisterInfo() const { return RI; };
-
- bool expandPostRAPseudo(MachineInstr &MI) const override;
-
- void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
deleted file mode 100644
index dee84f74bcbe..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "Nios2InstrFormats.td"
-
-
-//===----------------------------------------------------------------------===//
-// Nios2 Operand, Complex Patterns and Transformations Definitions.
-//===----------------------------------------------------------------------===//
-
-def simm16 : Operand<i32> {
- let DecoderMethod= "DecodeSimm16";
-}
-
-// Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
-def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
-
-// Custom return SDNode
-def Nios2Ret : SDNode<"Nios2ISD::Ret", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-//===----------------------------------------------------------------------===//
-// Instructions specific format
-//===----------------------------------------------------------------------===//
-
-// Arithmetic and logical instructions with 2 registers and 16-bit immediate
-// value.
-multiclass ArithLogicRegImm16<bits<6> op, string mnemonic, SDNode opNode,
- Operand immOp, PatLeaf immType>:
- CommonInstr_I_F2I16<op, (outs CPURegs:$rB),
- (ins CPURegs:$rA, immOp:$imm),
- !strconcat(mnemonic, "\t$rB, $rA, $imm"),
- [(set CPURegs:$rB,
- (opNode CPURegs:$rA, immType:$imm))],
- IIAlu>;
-
-// Arithmetic and logical instructions with 3 register operands.
-// Defines R1 and R2 instruction at the same time.
-multiclass ArithLogicReg<bits<6> opx, string mnemonic,
- SDNode opNode>:
- CommonInstr_R_F3X6<opx, (outs CPURegs:$rC),
- (ins CPURegs:$rA, CPURegs:$rB),
- !strconcat(mnemonic, "\t$rC, $rA, $rB"),
- [(set CPURegs:$rC, (opNode CPURegs:$rA, CPURegs:$rB))],
- IIAlu>;
-
-multiclass Return<bits<6> opx, dag outs, dag ins, string mnemonic> {
- let rB = 0, rC = 0,
- isReturn = 1,
- isCodeGenOnly = 1,
- hasCtrlDep = 1,
- hasExtraSrcRegAllocReq = 1 in {
- defm NAME# : CommonInstr_R_F3X6<opx, outs, ins, mnemonic, [], IIBranch>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Nios2 Instructions
-//===----------------------------------------------------------------------===//
-
-/// Arithmetic instructions operating on registers.
-let isCommutable = 1 ,
- isReMaterializable = 1 in {
- defm ADD : ArithLogicReg<0x31, "add", add>;
- defm AND : ArithLogicReg<0x0e, "and", and>;
- defm OR : ArithLogicReg<0x16, "or", or>;
- defm XOR : ArithLogicReg<0x1e, "xor", xor>;
- defm MUL : ArithLogicReg<0x27, "mul", mul>;
-}
-
-let isReMaterializable = 1 in {
- defm SUB : ArithLogicReg<0x39, "sub", sub>;
-}
-
-defm DIVU : ArithLogicReg<0x24, "divu", udiv>;
-defm DIV : ArithLogicReg<0x25, "div", sdiv>;
-
-defm SLL : ArithLogicReg<0x13, "sll", shl>;
-defm SRL : ArithLogicReg<0x1b, "srl", srl>;
-defm SRA : ArithLogicReg<0x3b, "sra", sra>;
-
-/// Arithmetic Instructions (ALU Immediate)
-defm ADDI : ArithLogicRegImm16<0x04, "addi", add, simm16, immSExt16>;
-
-// Returns:
-defm RET : Return<0x05, (outs), (ins CPURegs:$rA), "ret">;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-// Return RA.
-let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : Nios2Pseudo<(outs), (ins), "", [(Nios2Ret)]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
deleted file mode 100644
index c43af879b8a6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===-- Nios2MCInstLower.cpp - Convert Nios2 MachineInstr to MCInst -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower Nios2 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "Nios2.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-
-using namespace llvm;
-
-static MCOperand LowerSymbolOperand(const MachineOperand &MO, AsmPrinter &AP) {
- MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
- Nios2MCExpr::Nios2ExprKind TargetKind = Nios2MCExpr::CEK_None;
- const MCSymbol *Symbol;
-
- switch (MO.getTargetFlags()) {
- default:
- llvm_unreachable("Invalid target flag!");
- case Nios2FG::MO_NO_FLAG:
- break;
- case Nios2FG::MO_ABS_HI:
- TargetKind = Nios2MCExpr::CEK_ABS_HI;
- break;
- case Nios2FG::MO_ABS_LO:
- TargetKind = Nios2MCExpr::CEK_ABS_LO;
- break;
- }
-
- switch (MO.getType()) {
- case MachineOperand::MO_GlobalAddress:
- Symbol = AP.getSymbol(MO.getGlobal());
- break;
-
- case MachineOperand::MO_MachineBasicBlock:
- Symbol = MO.getMBB()->getSymbol();
- break;
-
- case MachineOperand::MO_BlockAddress:
- Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress());
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName());
- break;
-
- case MachineOperand::MO_JumpTableIndex:
- Symbol = AP.GetJTISymbol(MO.getIndex());
- break;
-
- case MachineOperand::MO_ConstantPoolIndex:
- Symbol = AP.GetCPISymbol(MO.getIndex());
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
- }
-
- const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, AP.OutContext);
-
- if (TargetKind != Nios2MCExpr::CEK_None)
- Expr = Nios2MCExpr::create(TargetKind, Expr, AP.OutContext);
-
- return MCOperand::createExpr(Expr);
-}
-
-static MCOperand LowerOperand(const MachineOperand &MO, AsmPrinter &AP) {
-
- switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- break;
- return MCOperand::createReg(MO.getReg());
- case MachineOperand::MO_Immediate:
- return MCOperand::createImm(MO.getImm());
- case MachineOperand::MO_MachineBasicBlock:
- case MachineOperand::MO_ExternalSymbol:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- return LowerSymbolOperand(MO, AP);
- case MachineOperand::MO_RegisterMask:
- break;
- }
-
- return MCOperand();
-}
-
-void llvm::LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP) {
-
- OutMI.setOpcode(MI->getOpcode());
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- MCOperand MCOp = LowerOperand(MO, AP);
-
- if (MCOp.isValid())
- OutMI.addOperand(MCOp);
- }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
deleted file mode 100644
index be5b8829fe36..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- Nios2MachineFunctionInfo.cpp - Private data used for Nios2 --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MachineFunction.h"
-
-using namespace llvm;
-
-void Nios2FunctionInfo::anchor() {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
deleted file mode 100644
index 73baf9694790..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- Nios2MachineFunctionInfo.h - Private data used for Nios2 --*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// Nios2FunctionInfo - This class is derived from MachineFunction private
-/// Nios2 target-specific information for each MachineFunction.
-class Nios2FunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
-private:
- unsigned GlobalBaseReg;
-
- /// VarArgsFrameOffset - Frame offset to start of varargs area.
- int VarArgsFrameOffset;
-
- /// SRetReturnReg - Holds the virtual register into which the sret
- /// argument is passed.
- unsigned SRetReturnReg;
-
- /// IsLeafProc - True if the function is a leaf procedure.
- bool IsLeafProc;
-
-public:
- Nios2FunctionInfo()
- : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
- IsLeafProc(false) {}
- explicit Nios2FunctionInfo(MachineFunction &MF)
- : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
- IsLeafProc(false) {}
-
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
- int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
- void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
-
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
-
- void setLeafProc(bool rhs) { IsLeafProc = rhs; }
- bool isLeafProc() const { return IsLeafProc; }
-};
-
-} // end of namespace llvm
-
-#endif // NIOS2_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
deleted file mode 100644
index 9b892f917535..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- Nios2RegisterInfo.cpp - Nios2 Register Information -== ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "nios2-reg-info"
-
-#include "Nios2RegisterInfo.h"
-
-#include "Nios2.h"
-#include "Nios2Subtarget.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-using namespace llvm;
-
-Nios2RegisterInfo::Nios2RegisterInfo(const Nios2Subtarget &ST)
- : Nios2GenRegisterInfo(Nios2::RA), Subtarget(ST) {}
-
-const TargetRegisterClass *Nios2RegisterInfo::intRegClass(unsigned Size) const {
- return &Nios2::CPURegsRegClass;
-}
-
-const MCPhysReg *
-Nios2RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SaveList;
-}
-
-BitVector Nios2RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- static const MCPhysReg ReservedCPURegs[] = {Nios2::ZERO, Nios2::AT, Nios2::SP,
- Nios2::RA, Nios2::PC, Nios2::GP};
- BitVector Reserved(getNumRegs());
-
- for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
- Reserved.set(ReservedCPURegs[I]);
-
- return Reserved;
-}
-
-void Nios2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {}
-
-unsigned Nios2RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return Nios2::SP;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
deleted file mode 100644
index 3658343b1d2e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2RegisterInfo.h - Nios2 Register Information Impl ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "Nios2GenRegisterInfo.inc"
-
-namespace llvm {
-class Nios2Subtarget;
-class TargetInstrInfo;
-class Type;
-
-class Nios2RegisterInfo : public Nios2GenRegisterInfo {
-protected:
- const Nios2Subtarget &Subtarget;
-
-public:
- Nios2RegisterInfo(const Nios2Subtarget &Subtarget);
-
- const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
-
- BitVector getReservedRegs(const MachineFunction &MF) const override;
-
- /// Stack Frame Processing Methods
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS = nullptr) const override;
-
- /// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
-
- /// Return GPR register class.
- const TargetRegisterClass *intRegClass(unsigned Size) const;
-};
-
-} // end namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
deleted file mode 100644
index 1808815816f3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// We have bank of 32 registers.
-class Nios2Reg<string n> : Register<n> {
- field bits<5> Num;
- let Namespace = "Nios2";
-}
-
-// Nios2 CPU Registers
-class Nios2GPRReg<bits<5> num, string n> : Nios2Reg<n> {
- let Num = num;
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-let Namespace = "Nios2" in {
- // General Purpose Registers
- def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>;
- def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>;
- foreach RegNum = 2 - 23 in {
- def R #RegNum : Nios2GPRReg<RegNum, "r" #RegNum>, DwarfRegNum<[ RegNum ]>;
- }
- def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>;
- def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>;
- def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>;
- def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>;
- def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>;
- def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>;
- def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>;
- def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>;
- def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Register Classes
-//===----------------------------------------------------------------------===//
-
-def CPURegs : RegisterClass<"Nios2", [ i32 ], 32,
- (add
- // Reserved
- ZERO,
- AT,
- // Return Values and Arguments
- (sequence "R%u", 2, 7),
- // Not preserved across procedure calls
- // Caller saved
- (sequence "R%u", 8, 15),
- // Callee saved
- (sequence "R%u", 16, 23),
- // Reserved
- ET, BT, GP, SP, FP, EA, BA, RA, PC)>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td b/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
deleted file mode 100644
index 2d1d9d5e5f3f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2Schedule.td - Nios2 Scheduling Definitions ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Functional units across Nios2 chips sets. Based on GCC/Nios2 backend files.
-//===----------------------------------------------------------------------===//
-def ALU : FuncUnit;
-def IMULDIV : FuncUnit;
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Nios2
-//===----------------------------------------------------------------------===//
-def IIAlu : InstrItinClass;
-def IILoad : InstrItinClass;
-def IIStore : InstrItinClass;
-def IIFlush : InstrItinClass;
-def IIIdiv : InstrItinClass;
-def IIBranch : InstrItinClass;
-
-def IIPseudo : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Nios2 Generic instruction itineraries.
-//===----------------------------------------------------------------------===//
-//@ http://llvm.org/docs/doxygen/html/structllvm_1_1InstrStage.html
-def Nios2GenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
- InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
- InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
- InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIFlush , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
- InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>
-]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
deleted file mode 100644
index 196bed20cdcc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- Nios2Subtarget.cpp - Nios2 Subtarget Information ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2Subtarget.h"
-#include "Nios2.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-subtarget"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "Nios2GenSubtargetInfo.inc"
-
-void Nios2Subtarget::anchor() {}
-
-Nios2Subtarget::Nios2Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM)
- :
-
- // Nios2GenSubtargetInfo will display features by llc -march=nios2
- // -mcpu=help
- Nios2GenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this) {}
-
-Nios2Subtarget &Nios2Subtarget::initializeSubtargetDependencies(StringRef CPU,
- StringRef FS) {
- if (TargetTriple.getArch() == Triple::nios2) {
- if (CPU != "nios2r2") {
- CPU = "nios2r1";
- Nios2ArchVersion = Nios2r1;
- } else {
- Nios2ArchVersion = Nios2r2;
- }
- } else {
- errs() << "!!!Error, TargetTriple.getArch() = " << TargetTriple.getArch()
- << "CPU = " << CPU << "\n";
- exit(0);
- }
-
- // Parse features string.
- ParseSubtargetFeatures(CPU, FS);
-
- return *this;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
deleted file mode 100644
index a822dff33b5b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- Nios2Subtarget.h - Define Subtarget for the Nios2 -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-
-#include "Nios2FrameLowering.h"
-#include "Nios2ISelLowering.h"
-#include "Nios2InstrInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "Nios2GenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
-class Nios2TargetMachine;
-
-class Nios2Subtarget : public Nios2GenSubtargetInfo {
- virtual void anchor();
-
-public:
- // Nios2 R2 features
- // Bit manipulation instructions extension
- bool HasBMX;
- // Code Density instructions extension
- bool HasCDX;
- // Multi-Processor instructions extension
- bool HasMPX;
- // New mandatory instructions
- bool HasR2Mandatory;
-
-protected:
- enum Nios2ArchEnum {
- // Nios2 R1 ISA
- Nios2r1,
- // Nios2 R2 ISA
- Nios2r2
- };
-
- // Nios2 architecture version
- Nios2ArchEnum Nios2ArchVersion;
-
- Triple TargetTriple;
-
- Nios2InstrInfo InstrInfo;
- Nios2TargetLowering TLInfo;
- SelectionDAGTargetInfo TSInfo;
- Nios2FrameLowering FrameLowering;
-
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- Nios2Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- bool hasNios2r1() const { return Nios2ArchVersion >= Nios2r1; }
- bool isNios2r1() const { return Nios2ArchVersion == Nios2r1; }
- bool hasNios2r2() const { return Nios2ArchVersion >= Nios2r2; }
- bool isNios2r2() const { return Nios2ArchVersion == Nios2r2; }
-
- Nios2Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
-
- const Nios2InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const Nios2RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
- const Nios2TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
- const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
deleted file mode 100644
index b7594dde709d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the info about Nios2 target spec.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetMachine.h"
-#include "Nios2.h"
-#include "Nios2TargetObjectFile.h"
-
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2"
-
-extern "C" void LLVMInitializeNios2Target() {
- // Register the target.
- RegisterTargetMachine<Nios2TargetMachine> X(getTheNios2Target());
-}
-
-static std::string computeDataLayout() {
- return "e-p:32:32:32-i8:8:32-i16:16:32-n32";
-}
-
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
-}
-
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool JIT) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
-Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
- : LLVMTargetMachine(
- T, computeDataLayout(), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
- TLOF(make_unique<Nios2TargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
- initAsmInfo();
-}
-
-Nios2TargetMachine::~Nios2TargetMachine() {}
-
-const Nios2Subtarget *
-Nios2TargetMachine::getSubtargetImpl(const Function &F) const {
- Attribute CPUAttr = F.getFnAttribute("target-cpu");
- Attribute FSAttr = F.getFnAttribute("target-features");
-
- std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
- ? CPUAttr.getValueAsString().str()
- : TargetCPU;
- std::string FS = !FSAttr.hasAttribute(Attribute::None)
- ? FSAttr.getValueAsString().str()
- : TargetFS;
-
- auto &I = SubtargetMap[CPU + FS];
- if (!I) {
- // This needs to be done before we create a new subtarget since any
- // creation will depend on the TM and the code generation flags on the
- // function that reside in TargetOptions.
- resetTargetOptions(F);
- I = llvm::make_unique<Nios2Subtarget>(TargetTriple, CPU, FS, *this);
- }
- return I.get();
-}
-
-namespace {
-/// Nios2 Code Generator Pass Configuration Options.
-class Nios2PassConfig : public TargetPassConfig {
-public:
- Nios2PassConfig(Nios2TargetMachine &TM, PassManagerBase *PM)
- : TargetPassConfig(TM, *PM) {}
-
- Nios2TargetMachine &getNios2TargetMachine() const {
- return getTM<Nios2TargetMachine>();
- }
-
- void addCodeGenPrepare() override;
- bool addInstSelector() override;
- void addIRPasses() override;
-};
-} // namespace
-
-TargetPassConfig *Nios2TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new Nios2PassConfig(*this, &PM);
-}
-
-void Nios2PassConfig::addCodeGenPrepare() {
- TargetPassConfig::addCodeGenPrepare();
-}
-
-void Nios2PassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); }
-
-// Install an instruction selector pass using
-// the ISelDag to gen Nios2 code.
-bool Nios2PassConfig::addInstSelector() {
- addPass(createNios2ISelDag(getNios2TargetMachine(), getOptLevel()));
- return false;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
deleted file mode 100644
index 1ebfb397383e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-
-#include "Nios2Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class Nios2TargetMachine : public LLVMTargetMachine {
- mutable StringMap<std::unique_ptr<Nios2Subtarget>> SubtargetMap;
- std::unique_ptr<TargetLoweringObjectFile> TLOF;
- Nios2Subtarget Subtarget;
-
-public:
- Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
- ~Nios2TargetMachine() override;
-
- const Nios2Subtarget *getSubtargetImpl() const { return &Subtarget; }
- const Nios2Subtarget *getSubtargetImpl(const Function &F) const override;
-
- TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF.get();
- }
-
- // Pass Pipeline Configuration
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
deleted file mode 100644
index 5fc85ef487e6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===-- Nios2TargetObjectFile.cpp - Nios2 Object Files --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetObjectFile.h"
-
-using namespace llvm;
-
-void Nios2TargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
deleted file mode 100644
index e9ed6e31d937..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- llvm/Target/Nios2TargetObjectFile.h - Nios2 Object Info -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-
-namespace llvm {
-
-class Nios2TargetObjectFile : public TargetLoweringObjectFileELF {
-public:
- Nios2TargetObjectFile() : TargetLoweringObjectFileELF() {}
-
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
deleted file mode 100644
index 1520ac27e94f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- Nios2TargetStreamer.h - Nios2 Target Streamer ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class Nios2TargetStreamer : public MCTargetStreamer {
-public:
- Nios2TargetStreamer(MCStreamer &S);
-};
-
-// This part is for ascii assembly output
-class Nios2TargetAsmStreamer : public Nios2TargetStreamer {
-public:
- Nios2TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
-};
-
-} // namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
deleted file mode 100644
index d808a96db772..000000000000
--- a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target &llvm::getTheNios2Target() {
- static Target TheNios2Target;
- return TheNios2Target;
-}
-
-extern "C" void LLVMInitializeNios2TargetInfo() {
- RegisterTarget<Triple::nios2,
- /*HasJIT=*/true>
- X(getTheNios2Target(), "nios2", "Nios2", "Nios2");
-}
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 56307a84f2e5..8b3480f772e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -31,169 +30,7 @@
using namespace llvm;
-static const MCPhysReg RRegs[32] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg RRegsNoR0[32] = {
- PPC::ZERO,
- PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg XRegs[32] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg XRegsNoX0[32] = {
- PPC::ZERO8,
- PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg FRegs[32] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-static const MCPhysReg SPERegs[32] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-static const MCPhysReg VFRegs[32] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VRegs[32] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSRegs[64] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSFRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VSSRegs[64] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static unsigned QFRegs[32] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-static const MCPhysReg CRBITRegs[32] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-static const MCPhysReg CRRegs[8] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
+DEFINE_PPC_REGCLASSES;
// Evaluate an expression containing condition register
// or condition register field symbols. Returns positive
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index db01271b87e1..26869f250823 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -17,6 +17,8 @@
using namespace llvm;
+DEFINE_PPC_REGCLASSES;
+
#define DEBUG_TYPE "ppc-disassembler"
typedef MCDisassembler::DecodeStatus DecodeStatus;
@@ -62,184 +64,9 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
// FIXME: These can be generated by TableGen from the existing register
// encoding values!
-static const unsigned CRRegs[] = {
- PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
- PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
-
-static const unsigned CRBITRegs[] = {
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-
-static const unsigned FRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-
-static const unsigned VFRegs[] = {
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VRegs[] = {
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSRegs[] = {
- PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
- PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
- PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
- PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
- PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
- PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
- PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
- PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
- PPC::V0, PPC::V1, PPC::V2, PPC::V3,
- PPC::V4, PPC::V5, PPC::V6, PPC::V7,
- PPC::V8, PPC::V9, PPC::V10, PPC::V11,
- PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSFRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VSSRegs[] = {
- PPC::F0, PPC::F1, PPC::F2, PPC::F3,
- PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11,
- PPC::F12, PPC::F13, PPC::F14, PPC::F15,
- PPC::F16, PPC::F17, PPC::F18, PPC::F19,
- PPC::F20, PPC::F21, PPC::F22, PPC::F23,
- PPC::F24, PPC::F25, PPC::F26, PPC::F27,
- PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
- PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
- PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
- PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
- PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
- PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
- PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
- PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
- PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned GPRegs[] = {
- PPC::R0, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned GP0Regs[] = {
- PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
- PPC::R4, PPC::R5, PPC::R6, PPC::R7,
- PPC::R8, PPC::R9, PPC::R10, PPC::R11,
- PPC::R12, PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned G8Regs[] = {
- PPC::X0, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned G80Regs[] = {
- PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
- PPC::X4, PPC::X5, PPC::X6, PPC::X7,
- PPC::X8, PPC::X9, PPC::X10, PPC::X11,
- PPC::X12, PPC::X13, PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned QFRegs[] = {
- PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
- PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
- PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
- PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
- PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
- PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
- PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-
-static const unsigned SPERegs[] = {
- PPC::S0, PPC::S1, PPC::S2, PPC::S3,
- PPC::S4, PPC::S5, PPC::S6, PPC::S7,
- PPC::S8, PPC::S9, PPC::S10, PPC::S11,
- PPC::S12, PPC::S13, PPC::S14, PPC::S15,
- PPC::S16, PPC::S17, PPC::S18, PPC::S19,
- PPC::S20, PPC::S21, PPC::S22, PPC::S23,
- PPC::S24, PPC::S25, PPC::S26, PPC::S27,
- PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned (&Regs)[N]) {
+ const MCPhysReg (&Regs)[N]) {
assert(RegNo < N && "Invalid register number");
Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
return MCDisassembler::Success;
@@ -308,25 +135,25 @@ static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GP0Regs);
+ return decodeRegisterClass(Inst, RegNo, RRegsNoR0);
}
static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G8Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegs);
}
static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, G80Regs);
+ return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
}
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
@@ -341,7 +168,7 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, GPRegs);
+ return decodeRegisterClass(Inst, RegNo, RRegs);
}
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -388,19 +215,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
case PPC::LFSU:
case PPC::LFDU:
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
break;
case PPC::STBU:
case PPC::STHU:
case PPC::STWU:
case PPC::STFSU:
case PPC::STFDU:
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
break;
}
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -416,12 +243,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
if (Inst.getOpcode() == PPC::LDU)
// Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
else if (Inst.getOpcode() == PPC::STDU)
- Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -436,7 +263,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4)));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -451,7 +278,7 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 3));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -466,7 +293,7 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 2));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -481,7 +308,7 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
assert(Base < 32 && "Invalid base register");
Inst.addOperand(MCOperand::createImm(Disp << 1));
- Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fd7f81591426..fc29e4effbb1 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -499,43 +499,14 @@ bool PPCInstPrinter::showRegistersWithPrefix() const {
return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
}
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCAsmPrinter::printOperand)
- if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
- !ShowVSRNumsAsVR) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ if (!ShowVSRNumsAsVR)
+ Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
+ Reg, OpNo);
const char *RegName;
RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
@@ -544,7 +515,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (showRegistersWithPercentPrefix(RegName))
O << "%";
if (!showRegistersWithPrefix())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 57bda1403c62..8c15ade6f9c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,18 +13,13 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,117 +34,6 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
-namespace {
-
-class PPCMCCodeEmitter : public MCCodeEmitter {
- const MCInstrInfo &MCII;
- const MCContext &CTX;
- bool IsLittleEndian;
-
-public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), CTX(ctx),
- IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
- ~PPCMCCodeEmitter() override = default;
-
- unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- // getBinaryCodeForInstr - TableGen'erated function for getting the
- // binary encoding for an instruction.
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override {
- verifyInstructionPredicates(MI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
- unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MCII.get(Opcode);
-
- uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
-
- // Output the constant in big/little endian byte order.
- unsigned Size = Desc.getSize();
- support::endianness E = IsLittleEndian ? support::little : support::big;
- switch (Size) {
- case 0:
- break;
- case 4:
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- case 8:
- // If we emit a pair of instructions, the first one is
- // always in the top 32 bits, even on little-endian.
- support::endian::write<uint32_t>(OS, Bits >> 32, E);
- support::endian::write<uint32_t>(OS, Bits, E);
- break;
- default:
- llvm_unreachable("Invalid instruction size");
- }
-
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
- }
-
-private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
-};
-
-} // end anonymous namespace
-
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
@@ -264,10 +148,16 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isImm() && !(MO.getImm() % 16) &&
- "Expecting an immediate that is a multiple of 16");
+ if (MO.isImm()) {
+ assert(!(MO.getImm() % 16) &&
+ "Expecting an immediate that is a multiple of 16");
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ }
- return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ // Otherwise add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
+ return RegBits;
}
unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
@@ -354,6 +244,20 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
+// Get the index for this operand in this instruction. This is needed for
+// computing the register number in PPCInstrInfo::getRegNumForOperand() for
+// any instructions that use a different numbering scheme for registers in
+// different operands.
+static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ const MCOperand &Op = MI.getOperand(i);
+ if (&Op == &MO)
+ return i;
+ }
+ llvm_unreachable("This operand is not part of this instruction");
+ return ~0U; // Silence any warnings about no return.
+}
+
unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -364,14 +268,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- unsigned Reg = MO.getReg();
- unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
-
- if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
- if (PPCInstrInfo::isVRRegister(Reg))
- Encode += 32;
-
- return Encode;
+ unsigned OpNo = getOpIdxForMO(MI, MO);
+ unsigned Reg =
+ PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()),
+ MO.getReg(), OpNo);
+ return CTX.getRegisterInfo()->getEncodingValue(Reg);
}
assert(MO.isImm() &&
@@ -379,5 +280,42 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
+void PPCMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ // Output the constant in big/little endian byte order.
+ unsigned Size = getInstSizeInBytes(MI);
+ support::endianness E = IsLittleEndian ? support::little : support::big;
+ switch (Size) {
+ case 0:
+ break;
+ case 4:
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ support::endian::write<uint32_t>(OS, Bits >> 32, E);
+ support::endian::write<uint32_t>(OS, Bits, E);
+ break;
+ default:
+ llvm_unreachable("Invalid instruction size");
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+// Get the number of bytes used to encode the given MCInst.
+unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ return Desc.getSize();
+}
+
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
new file mode 100644
index 000000000000..a4bcff4b9450
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -0,0 +1,109 @@
+//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+#define LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ // Get the number of bytes used to encode the given MCInst.
+ unsigned getInstSizeInBytes(const MCInst &MI) const;
+
+private:
+ uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+ void verifyInstructionPredicates(const MCInst &MI,
+ uint64_t AvailableFeatures) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 316fd2ccf358..d6e450cba0d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,6 +17,7 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
#include <memory>
@@ -104,4 +105,63 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
+#define PPC_REGS0_31(X) \
+ { \
+ X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_NO0_31(Z, X) \
+ { \
+ Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
+ X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \
+ X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
+ }
+
+#define PPC_REGS_LO_HI(LO, HI) \
+ { \
+ LO##0, LO##1, LO##2, LO##3, LO##4, LO##5, LO##6, LO##7, LO##8, LO##9, \
+ LO##10, LO##11, LO##12, LO##13, LO##14, LO##15, LO##16, LO##17, \
+ LO##18, LO##19, LO##20, LO##21, LO##22, LO##23, LO##24, LO##25, \
+ LO##26, LO##27, LO##28, LO##29, LO##30, LO##31, HI##0, HI##1, HI##2, \
+ HI##3, HI##4, HI##5, HI##6, HI##7, HI##8, HI##9, HI##10, HI##11, \
+ HI##12, HI##13, HI##14, HI##15, HI##16, HI##17, HI##18, HI##19, \
+ HI##20, HI##21, HI##22, HI##23, HI##24, HI##25, HI##26, HI##27, \
+ HI##28, HI##29, HI##30, HI##31 \
+ }
+
+using llvm::MCPhysReg;
+
+#define DEFINE_PPC_REGCLASSES \
+ static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
+ static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
+ static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
+ static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
+ static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+ static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
+ static const MCPhysReg RRegsNoR0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
+ static const MCPhysReg XRegsNoX0[32] = \
+ PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
+ static const MCPhysReg VSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
+ static const MCPhysReg VSFRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg VSSRegs[64] = \
+ PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg CRBITRegs[32] = { \
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
+ static const MCPhysReg CRRegs[8] = { \
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
+
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
index c6cbb9037ede..17c37964c562 100644
--- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
(instregex "POPCNT(D|W)$"),
(instregex "CMPB(8)?$"),
+ (instregex "SETB(8)?$"),
XSTDIVDP,
XSTSQRTDP,
XSXSIGDP,
XSCVSPDPN,
- SETB,
BPERMD
)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 80ad4962a20f..98e6e98e6974 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -305,11 +305,11 @@ def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureICBT, FeatureBookE,
+ FeatureICBT, FeatureBookE,
FeatureMSYNC, FeatureMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>;
def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU,
@@ -348,7 +348,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
- FeatureFRES, FeatureFRSQRTE,
+ FeatureFRES, FeatureFRSQRTE,
FeatureMFTB]>;
def : ProcessorModel<"970", G5Model,
@@ -369,11 +369,11 @@ def : ProcessorModel<"e500", PPCE500Model,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
@@ -428,7 +428,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
FeatureMFTB]>;
def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -478,3 +478,9 @@ def PPC : Target {
let AssemblyParserVariants = [PPCAsmParserVariant];
let AllowRegisterRenaming = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "PPCPfmCounters.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a9da64cc216f..04aa3c9b1e22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -158,23 +158,6 @@ public:
} // end anonymous namespace
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left. Used by for linux asm.
-static const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'q': // for QPX
- case 'v':
- if (RegName[1] == 's')
- return RegName + 2;
- return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
-
- return RegName;
-}
-
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const DataLayout &DL = getDataLayout();
@@ -182,27 +165,15 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
switch (MO.getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
-
- // There are VSX instructions that use VSX register numbering (vs0 - vs63)
- // as well as those that use VMX register numbering (v0 - v31 which
- // correspond to vs32 - vs63). If we have an instruction that uses VSX
- // numbering, we need to convert the VMX registers to VSX registers.
- // Namely, we print 32-63 when the instruction operates on one of the
- // VMX registers.
- // (Please synchronize with PPCInstPrinter::printOperand)
- if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
- if (PPCInstrInfo::isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (PPCInstrInfo::isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
- }
+ unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
+ MO.getReg(), OpNo);
+
const char *RegName = PPCInstPrinter::getRegisterName(Reg);
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName;
return;
}
@@ -279,6 +250,21 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (MI->getOperand(OpNo).isImm())
O << "i";
return false;
+ case 'x':
+ if(!MI->getOperand(OpNo).isReg())
+ return true;
+ // This operand uses VSX numbering.
+ // If the operand is a VMX register, convert it to a VSX register.
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ if (PPCInstrInfo::isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (PPCInstrInfo::isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ const char *RegName;
+ RegName = PPCInstPrinter::getRegisterName(Reg);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+ O << RegName;
+ return false;
}
}
@@ -303,7 +289,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
{
const char *RegName = "r0";
if (!Subtarget->isDarwin())
- RegName = stripRegisterPrefix(RegName);
+ RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
O << RegName << ", ";
printOperand(MI, OpNo, O);
return false;
@@ -341,7 +327,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
}
void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 12c581023234..22842d516e7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -338,7 +338,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// coldcc calling convection marks most registers as non-volatile.
// Do not include r1 since the stack pointer is never considered a CSR.
// Do not include r2, since it is the TOC register and is added depending
-// on wether or not the function uses the TOC and is a non-leaf.
+// on whether or not the function uses the TOC and is a non-leaf.
// Do not include r0,r11,r13 as they are optional in functional linkage
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index fe41e1b36a5d..a03e691ef5bb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -392,7 +392,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
// liveness state at the end of MBB (liveOut of MBB) as the liveIn for
// NewSuccessor. Otherwise, will cause cyclic dependence.
LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
- SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
for (MachineInstr &MI : *MBB)
LPR.stepForward(MI, Clobbers);
for (auto &LI : LPR)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index f212894035db..3b2d92db78b9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -861,8 +861,20 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
}
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
unsigned CmpOpc;
bool NeedsExt = false;
+ auto RC = MRI.getRegClass(SrcReg1);
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
@@ -879,8 +891,15 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFSCMPGT;
break;
}
- } else
+ } else {
CmpOpc = PPC::FCMPUS;
+ if (isVSSRCRegClass(RC)) {
+ unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
+ SrcReg1 = TmpReg;
+ }
+ }
break;
case MVT::f64:
if (HasSPE) {
@@ -896,14 +915,17 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFDCMPGT;
break;
}
- } else
+ } else if (isVSFRCRegClass(RC)) {
+ CmpOpc = PPC::XSCMPUDP;
+ } else {
CmpOpc = PPC::FCMPUD;
+ }
break;
case MVT::i1:
case MVT::i8:
case MVT::i16:
NeedsExt = true;
- // Intentional fall-through.
+ LLVM_FALLTHROUGH;
case MVT::i32:
if (!UseImm)
CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
@@ -918,17 +940,6 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
break;
}
- unsigned SrcReg1 = getRegForValue(SrcValue1);
- if (SrcReg1 == 0)
- return false;
-
- unsigned SrcReg2 = 0;
- if (!UseImm) {
- SrcReg2 = getRegForValue(SrcValue2);
- if (SrcReg2 == 0)
- return false;
- }
-
if (NeedsExt) {
unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
@@ -2354,7 +2365,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 84dacf396462..8263954994d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,16 @@
using namespace llvm;
+#define DEBUG_TYPE "framelowering"
+STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
+STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
+STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+
+static cl::opt<bool>
+EnablePEVectorSpills("ppc-enable-pe-vector-spills",
+ cl::desc("Enable spills in prologue to vector registers."),
+ cl::init(false), cl::Hidden);
+
/// VRRegNo - Map from a numbered VR register to its enum value.
///
static const MCPhysReg VRRegNo[] = {
@@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
+ NumNoNeedForFrame++;
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
continue;
}
- int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (CSI[I].isSpilledToReg()) {
+ unsigned SpilledReg = CSI[I].getDstReg();
+ unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
+ nullptr, MRI->getDwarfRegNum(Reg, true),
+ MRI->getDwarfRegNum(SpilledReg, true)));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIRegister);
+ } else {
+ int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
}
}
@@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
- int FI = GPRegs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!GPRegs[i].isSpilledToReg()) {
+ int FI = GPRegs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
- int FI = G8Regs[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ if (!G8Regs[i].isSpilledToReg()) {
+ int FI = G8Regs[i].getFrameIdx();
+ MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ }
}
unsigned MinReg =
@@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
}
}
+// This function checks if a callee saved gpr can be spilled to a volatile
+// vector register. This occurs for leaf functions when the option
+// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
+// which were not spilled to vectors, return false so the target independent
+// code can handle them by assigning a FrameIdx to a stack slot.
+bool PPCFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ // Early exit if cannot spill gprs to volatile vector registers.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
+ return false;
+
+ // Build a BitVector of VSRs that can be used for spilling GPRs.
+ BitVector BVAllocatable = TRI->getAllocatableSet(MF);
+ BitVector BVCalleeSaved(TRI->getNumRegs());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ BVCalleeSaved.set(CSRegs[i]);
+
+ for (unsigned Reg : BVAllocatable.set_bits()) {
+ // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+ // used in the function.
+ if (BVCalleeSaved[Reg] ||
+ (!PPC::F8RCRegClass.contains(Reg) &&
+ !PPC::VFRCRegClass.contains(Reg)) ||
+ (MF.getRegInfo().isPhysRegUsed(Reg)))
+ BVAllocatable.reset(Reg);
+ }
+
+ bool AllSpilledToReg = true;
+ for (auto &CS : CSI) {
+ if (BVAllocatable.none())
+ return false;
+
+ unsigned Reg = CS.getReg();
+ if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+ AllSpilledToReg = false;
+ continue;
+ }
+
+ unsigned VolatileVFReg = BVAllocatable.find_first();
+ if (VolatileVFReg < BVAllocatable.size()) {
+ CS.setDstReg(VolatileVFReg);
+ BVAllocatable.reset(VolatileVFReg);
+ } else {
+ AllSpilledToReg = false;
+ }
+ }
+ return AllSpilledToReg;
+}
+
+
bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CSI[i].getFrameIdx()));
}
} else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- // Use !IsLiveIn for the kill flag.
- // We do not want to kill registers that are live in this function
- // before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ if (CSI[i].isSpilledToReg()) {
+ NumPESpillVSR++;
+ BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ // Use !IsLiveIn for the kill flag.
+ // We do not want to kill registers that are live in this function
+ // before their use because they will become undefined registers.
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
}
}
return true;
@@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
- // Default behavior for non-CR saves.
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
- assert(I != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
+ if (CSI[i].isSpilledToReg()) {
+ DebugLoc DL;
+ NumPEReloadVSR++;
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
+ .addReg(CSI[i].getDstReg(), getKillRegState(true));
+ } else {
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
}
+ }
// Insert in reverse order.
if (AtStart)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 01c155594c44..69bd1484d6e5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -99,6 +99,13 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override;
+ /// This function will assign callee saved gprs to volatile vector registers
+ /// for prologue spills when applicable. It returns false if there are any
+ /// registers which were not spilled to volatile vector registers.
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 793a4dd7f624..5f6966cecd61 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -103,7 +103,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHA:
case PPC::Sched::IIC_LdStLHAU:
case PPC::Sched::IIC_LdStLWA:
- case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTU:
case PPC::Sched::IIC_LdStSTFDU:
NSlots = 2;
break;
@@ -112,7 +112,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
case PPC::Sched::IIC_LdStLHAUX:
case PPC::Sched::IIC_LdStLWARX:
case PPC::Sched::IIC_LdStLDARX:
- case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTUX:
case PPC::Sched::IIC_LdStSTDCX:
case PPC::Sched::IIC_LdStSTWCX:
case PPC::Sched::IIC_BrMCRX: // mtcr
@@ -180,9 +180,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
- LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << SU->NodeNum
- << "): ");
- LLVM_DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
+ LLVM_DEBUG(DAG->dumpNode(*SU));
unsigned NSlots;
bool MustBeFirst = mustComeFirst(MCID, NSlots);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
+STATISTIC(NumP9Setb,
+ "Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@@ -327,7 +329,6 @@ private:
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
- MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
};
} // end anonymous namespace
@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
- const TerminatorInst *BBTerm = BB->getTerminator();
+ const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- KnownBits LKnown, RKnown;
- CurDAG->computeKnownBits(Op0, LKnown);
- CurDAG->computeKnownBits(Op1, RKnown);
+ KnownBits LKnown = CurDAG->computeKnownBits(Op0);
+ KnownBits RKnown = CurDAG->computeKnownBits(Op1);
unsigned TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();
@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
- KnownBits MKnown;
- CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+ KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
@@ -1083,9 +1082,14 @@ class BitPermutationSelector {
// lowest-order bit.
unsigned Idx;
+ // ConstZero means a bit we need to mask off.
+ // Variable is a bit comes from an input variable.
+ // VariableKnownToBeZero is also a bit comes from an input variable,
+ // but it is known to be already zero. So we do not need to mask them.
enum Kind {
ConstZero,
- Variable
+ Variable,
+ VariableKnownToBeZero
} K;
ValueBit(SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1098,11 @@ class BitPermutationSelector {
: V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
bool isZero() const {
- return K == ConstZero;
+ return K == ConstZero || K == VariableKnownToBeZero;
}
bool hasValue() const {
- return K == Variable;
+ return K == Variable || K == VariableKnownToBeZero;
}
SDValue getValue() const {
@@ -1248,8 +1252,14 @@ class BitPermutationSelector {
for (unsigned i = 0; i < NumBits; ++i)
if (((Mask >> i) & 1) == 1)
Bits[i] = (*LHSBits)[i];
- else
- Bits[i] = ValueBit(ValueBit::ConstZero);
+ else {
+ // AND instruction masks this bit. If the input is already zero,
+ // we have nothing to do here. Otherwise, make the bit ConstZero.
+ if ((*LHSBits)[i].isZero())
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
return std::make_pair(Interesting, &Bits);
}
@@ -1259,8 +1269,26 @@ class BitPermutationSelector {
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
bool AllDisjoint = true;
- for (unsigned i = 0; i < NumBits; ++i)
- if (LHSBits[i].isZero())
+ SDValue LastVal = SDValue();
+ unsigned LastIdx = 0;
+ for (unsigned i = 0; i < NumBits; ++i) {
+ if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+ // If both inputs are known to be zero and one is ConstZero and
+ // another is VariableKnownToBeZero, we can select whichever
+ // we like. To minimize the number of bit groups, we select
+ // VariableKnownToBeZero if this bit is the next bit of the same
+ // input variable from the previous bit. Otherwise, we select
+ // ConstZero.
+ if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
+ LHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = LHSBits[i];
+ else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
+ RHSBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = RHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+ }
+ else if (LHSBits[i].isZero())
Bits[i] = RHSBits[i];
else if (RHSBits[i].isZero())
Bits[i] = LHSBits[i];
@@ -1268,6 +1296,16 @@ class BitPermutationSelector {
AllDisjoint = false;
break;
}
+ // We remember the value and bit index of this bit.
+ if (Bits[i].hasValue()) {
+ LastVal = Bits[i].getValue();
+ LastIdx = Bits[i].getValueBitIndex();
+ }
+ else {
+ if (LastVal) LastVal = SDValue();
+ LastIdx = 0;
+ }
+ }
if (!AllDisjoint)
break;
@@ -1293,6 +1331,72 @@ class BitPermutationSelector {
return std::make_pair(Interesting, &Bits);
}
+ case ISD::TRUNCATE: {
+ EVT FromType = V.getOperand(0).getValueType();
+ EVT ToType = V.getValueType();
+ // We support only the case with truncate from i64 to i32.
+ if (FromType != MVT::i64 || ToType != MVT::i32)
+ break;
+ const unsigned NumAllBits = FromType.getSizeInBits();
+ SmallVector<ValueBit, 64> *InBits;
+ std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+ NumAllBits);
+ const unsigned NumValidBits = ToType.getSizeInBits();
+
+ // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+ // So, we cannot include this truncate.
+ bool UseUpper32bit = false;
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+ UseUpper32bit = true;
+ break;
+ }
+ if (UseUpper32bit)
+ break;
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*InBits)[i];
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::AssertZext: {
+ // For AssertZext, we look through the operand and
+ // mark the bits known to be zero.
+ const SmallVector<ValueBit, 64> *LHSBits;
+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
+ NumBits);
+
+ EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+ const unsigned NumValidBits = FromType.getSizeInBits();
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero);
+
+ return std::make_pair(Interesting, &Bits);
+ }
+ case ISD::LOAD:
+ LoadSDNode *LD = cast<LoadSDNode>(V);
+ if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ const unsigned NumValidBits = VT.getSizeInBits();
+
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = ValueBit(V, i);
+
+ // These bits are known to be zero.
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
+
+ // Zero-extending load itself cannot be optimized. So, it is not
+ // interesting by itself though it gives useful information.
+ return std::make_pair(Interesting = false, &Bits);
+ }
+ break;
}
for (unsigned i = 0; i < NumBits; ++i)
@@ -1304,7 +1408,7 @@ class BitPermutationSelector {
// For each value (except the constant ones), compute the left-rotate amount
// to get it from its original to final position.
void computeRotationAmounts() {
- HasZeros = false;
+ NeedMask = false;
RLAmt.resize(Bits.size());
for (unsigned i = 0; i < Bits.size(); ++i)
if (Bits[i].hasValue()) {
@@ -1314,7 +1418,7 @@ class BitPermutationSelector {
else
RLAmt[i] = Bits.size() - (VBI - i);
} else if (Bits[i].isZero()) {
- HasZeros = true;
+ NeedMask = true;
RLAmt[i] = UINT32_MAX;
} else {
llvm_unreachable("Unknown value bit type");
@@ -1330,6 +1434,7 @@ class BitPermutationSelector {
unsigned LastRLAmt = RLAmt[0];
SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
unsigned LastGroupStartIdx = 0;
+ bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
for (unsigned i = 1; i < Bits.size(); ++i) {
unsigned ThisRLAmt = RLAmt[i];
SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
@@ -1342,10 +1447,20 @@ class BitPermutationSelector {
LastGroupStartIdx = 0;
}
+ // If this bit is known to be zero and the current group is a bit group
+ // of zeros, we do not need to terminate the current bit group even the
+ // Value or RLAmt does not match here. Instead, we terminate this group
+ // when the first non-zero bit appears later.
+ if (IsGroupOfZeros && Bits[i].isZero())
+ continue;
+
// If this bit has the same underlying value and the same rotate factor as
// the last one, then they're part of the same group.
if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
- continue;
+ // We cannot continue the current group if this bits is not known to
+ // be zero in a bit group of zeros.
+ if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
+ continue;
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1353,6 +1468,7 @@ class BitPermutationSelector {
LastRLAmt = ThisRLAmt;
LastValue = ThisValue;
LastGroupStartIdx = i;
+ IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
}
if (LastValue.getNode())
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1401,7 +1517,7 @@ class BitPermutationSelector {
for (auto &I : ValueRots) {
ValueRotsVec.push_back(I.second);
}
- llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ llvm::sort(ValueRotsVec);
}
// In 64-bit mode, rlwinm and friends have a rotation operator that
@@ -1588,6 +1704,17 @@ class BitPermutationSelector {
return ExtVal;
}
+ SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+ if (V.getValueSizeInBits() == 32)
+ return V;
+
+ assert(V.getValueSizeInBits() == 64);
+ SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+ MVT::i32, V, SubRegIdx), 0);
+ return SubVal;
+ }
+
// Depending on the number of groups for a particular value, it might be
// better to rotate, mask explicitly (using andi/andis), and then or the
// result. Select this part of the result first.
@@ -1646,12 +1773,12 @@ class BitPermutationSelector {
SDValue VRot;
if (VRI.RLAmt) {
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
Ops), 0);
} else {
- VRot = VRI.V;
+ VRot = TruncateToInt32(VRI.V, dl);
}
SDValue ANDIVal, ANDISVal;
@@ -1698,17 +1825,17 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
ValueRotInfo &VRI = ValueRotsVec[0];
if (VRI.RLAmt) {
if (InstCnt) *InstCnt += 1;
SDValue Ops[] =
- { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
- getI32Imm(31, dl) };
+ { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+ getI32Imm(0, dl), getI32Imm(31, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
} else {
- Res = VRI.V;
+ Res = TruncateToInt32(VRI.V, dl);
}
// Now, remove all groups with this underlying value and rotation factor.
@@ -1723,13 +1850,13 @@ class BitPermutationSelector {
for (auto &BG : BitGroups) {
if (!Res) {
SDValue Ops[] =
- { BG.V, getI32Imm(BG.RLAmt, dl),
+ { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
} else {
SDValue Ops[] =
- { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
@@ -2077,7 +2204,7 @@ class BitPermutationSelector {
// If we've not yet selected a 'starting' instruction, and we have no zeros
// to fill in, select the (Value, RLAmt) with the highest priority (largest
// number of groups), and start with this rotated value.
- if ((!HasZeros || LateMask) && !Res) {
+ if ((!NeedMask || LateMask) && !Res) {
// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
// groups will come first, and so the VRI representing the largest number
// of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2357,7 @@ class BitPermutationSelector {
SmallVector<ValueBit, 64> Bits;
- bool HasZeros;
+ bool NeedMask;
SmallVector<unsigned, 64> RLAmt;
SmallVector<BitGroup, 16> BitGroups;
@@ -2259,10 +2386,10 @@ public:
" selection for: ");
LLVM_DEBUG(N->dump(CurDAG));
- // Fill it RLAmt and set HasZeros.
+ // Fill it RLAmt and set NeedMask.
computeRotationAmounts();
- if (!HasZeros)
+ if (!NeedMask)
return Select(N, false);
// We currently have two techniques for handling results with zeros: early
@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
}
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
- SDLoc dl(N);
- EVT VecVT = N.getValueType();
- if (VecVT == MVT::v4i32) {
- if (SignBitVec) {
- SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
- *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
- SDValue(ZV, 0));
- }
- return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
- }
- else if (VecVT == MVT::v8i16) {
- SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
- getI32Imm(0x8000, dl));
- SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
- SDValue(Hi, 0),
- getI32Imm(0x8000, dl));
- SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
- SDValue(ScaImm, 0));
- /*
- Alternatively, we can do this as follow to use VRF instead of GPR.
- vspltish 5, 1
- vspltish 6, 15
- vslh 5, 6, 5
- */
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
- SDValue(VecImm, 0));
- }
- else if (VecVT == MVT::v16i8) {
- SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
- getI32Imm(0x80, dl));
- if (SignBitVec) *SignBitVec = VecImm;
- return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
- SDValue(VecImm, 0));
+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
+ bool &NeedSwapOps, bool &IsUnCmp) {
+
+ assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue TrueRes = N->getOperand(2);
+ SDValue FalseRes = N->getOperand(3);
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
+ if (!TrueConst)
+ return false;
+
+ assert((N->getSimpleValueType(0) == MVT::i64 ||
+ N->getSimpleValueType(0) == MVT::i32) &&
+ "Expecting either i64 or i32 here.");
+
+ // We are looking for any of:
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
+ // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
+ int64_t TrueResVal = TrueConst->getSExtValue();
+ if ((TrueResVal < -1 || TrueResVal > 1) ||
+ (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
+ (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
+ (TrueResVal == 0 &&
+ (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
+ return false;
+
+ bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
+ SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+ if (SetOrSelCC.getOpcode() != ISD::SETCC &&
+ SetOrSelCC.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ // Without this setb optimization, the outer SELECT_CC will be manually
+ // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
+ // transforms pseduo instruction to isel instruction. When there are more than
+ // one use for result like zext/sext, with current optimization we only see
+ // isel is replaced by setb but can't see any significant gain. Since
+ // setb has longer latency than original isel, we should avoid this. Another
+ // point is that setb requires comparison always kept, it can break the
+ // oppotunity to get the comparison away if we have in future.
+ if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
+ return false;
+
+ SDValue InnerLHS = SetOrSelCC.getOperand(0);
+ SDValue InnerRHS = SetOrSelCC.getOperand(1);
+ ISD::CondCode InnerCC =
+ cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
+ // If the inner comparison is a select_cc, make sure the true/false values are
+ // 1/-1 and canonicalize it if needed.
+ if (InnerIsSel) {
+ ConstantSDNode *SelCCTrueConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
+ ConstantSDNode *SelCCFalseConst =
+ dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
+ if (!SelCCTrueConst || !SelCCFalseConst)
+ return false;
+ int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
+ int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
+ // The values must be -1/1 (requiring a swap) or 1/-1.
+ if (SelCCTVal == -1 && SelCCFVal == 1) {
+ std::swap(InnerLHS, InnerRHS);
+ } else if (SelCCTVal != 1 || SelCCFVal != -1)
+ return false;
}
- else
- llvm_unreachable("Unsupported vector data type for flipSignBit");
+
+ // Canonicalize unsigned case
+ if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
+ IsUnCmp = true;
+ InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
+ }
+
+ bool InnerSwapped = false;
+ if (LHS == InnerRHS && RHS == InnerLHS)
+ InnerSwapped = true;
+ else if (LHS != InnerLHS || RHS != InnerRHS)
+ return false;
+
+ switch (CC) {
+ // (select_cc lhs, rhs, 0, \
+ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
+ case ISD::SETEQ:
+ if (!InnerIsSel)
+ return false;
+ if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
+ return false;
+ NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
+ break;
+
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
+ case ISD::SETULT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETLT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
+ (InnerCC == ISD::SETLT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == 1);
+ else
+ return false;
+ break;
+
+ // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
+ // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
+ case ISD::SETUGT:
+ if (!IsUnCmp && InnerCC != ISD::SETNE)
+ return false;
+ IsUnCmp = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETGT:
+ if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
+ (InnerCC == ISD::SETGT && InnerSwapped))
+ NeedSwapOps = (TrueResVal == -1);
+ else
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
+ LLVM_DEBUG(N->dump());
+
+ return true;
}
// Select - Convert the specified operand from a target-independent to a
@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- KnownBits LHSKnown;
- CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
+ KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getValueType() == MVT::i1)
break;
+ if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ bool NeedSwapOps = false;
+ bool IsUnCmp = false;
+ if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (NeedSwapOps)
+ std::swap(LHS, RHS);
+
+ // Make use of SelectCC to generate the comparison to set CR bits, for
+ // equality comparisons having one literal operand, SelectCC probably
+ // doesn't need to materialize the whole literal and just use xoris to
+ // check it first, it leads the following comparison result can't
+ // exactly represent GT/LT relationship. So to avoid this we specify
+ // SETGT/SETUGT here instead of SETEQ.
+ SDValue GenCC =
+ SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
+ CurDAG->SelectNodeTo(
+ N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
+ N->getValueType(0), GenCC);
+ NumP9Setb++;
+ return;
+ }
+ }
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
return;
}
- case ISD::VSELECT:
- if (PPCSubTarget->hasVSX()) {
- SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
- CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
- return;
- }
- break;
-
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
+ MachineMemOperand *MemOp = LD->getMemOperand();
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
N->getValueType(0), Ops);
- cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
return;
}
}
@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
}
+ // A signed comparison of i1 values produces the opposite result to an
+ // unsigned one if the condition code includes less-than or greater-than.
+ // This is because 1 is the most negative signed i1 number and the most
+ // positive unsigned i1 number. The CR-logical operations used for such
+ // comparisons are non-commutative so for signed comparisons vs. unsigned
+ // ones, the input operands just need to be swapped.
+ if (ISD::isSignedIntSetCC(CC))
+ Swap = !Swap;
+
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
N->getOperand(Swap ? 3 : 2),
N->getOperand(Swap ? 2 : 3)), 0);
@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
-
- if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
- CModel == CodeModel::Large) {
+ if (PPCLowering->isAccessedAsGotIndirect(GA)) {
+ // If it is access as got-indirect, we need an extra LD to load
+ // the address.
SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
transferMemOperands(N, MN);
@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
- }
-
+ // Build the address relative to the TOC-pointer..
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
- case ISD::ABS: {
- assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
- // For vector absolute difference, we use VABSDUW instruction of POWER9.
- // Since VABSDU instructions are for unsigned integers, we need adjustment
- // for signed integers.
- // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
- // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
- // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
- EVT VecVT = N->getOperand(0).getValueType();
- SDNode *AbsOp = nullptr;
- unsigned AbsOpcode;
-
- if (VecVT == MVT::v4i32)
- AbsOpcode = PPC::VABSDUW;
- else if (VecVT == MVT::v8i16)
- AbsOpcode = PPC::VABSDUH;
- else if (VecVT == MVT::v16i8)
- AbsOpcode = PPC::VABSDUB;
- else
- llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
- // Even for signed integers, we can skip adjustment if all values are
- // known to be positive (as signed integer) due to zero-extended inputs.
- if (N->getOperand(0).getOpcode() == ISD::SUB &&
- N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(N->getOperand(0)->getOperand(0)),
- SDValue(N->getOperand(0)->getOperand(1)));
- ReplaceNode(N, AbsOp);
- return;
- }
- if (N->getOperand(0).getOpcode() == ISD::SUB) {
- SDValue SubVal = N->getOperand(0);
- SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
- SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
- SDValue(Op0, 0), SDValue(Op1, 0));
- }
- else {
- SDNode *Op1 = nullptr;
- SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
- AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
- SDValue(Op1, 0));
- }
- ReplaceNode(N, AbsOp);
- return;
- }
}
SelectCode(N);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b5bdf47ce37a..39608cb74bee 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
- if (Subtarget.hasP9Vector()) {
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- }
-
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -323,12 +317,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- if (Subtarget.isISA3_0()) {
+ if (Subtarget.hasP9Vector())
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ if (Subtarget.isISA3_0()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -554,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Custom);
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -586,6 +583,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
@@ -626,7 +624,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
@@ -659,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ // Without hasP8Altivec set, v2i64 SMAX isn't available.
+ // But ABS custom lowering requires SMAX support.
+ if (!Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -727,12 +729,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
-
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
@@ -792,12 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
- // Vector operation legalization checks the result type of
- // SIGN_EXTEND_INREG, overall legalization checks the inner type.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+ // Custom handling for partial vectors of integers converted to
+ // floating point. We already have optimal handling for v2i32 through
+ // the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -1055,6 +1056,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -1076,6 +1078,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::TRUNCATE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
@@ -1088,6 +1092,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::FSQRT);
}
+ if (Subtarget.hasP9Altivec()) {
+ setTargetDAGCombine(ISD::ABS);
+ setTargetDAGCombine(ISD::VSELECT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1348,6 +1357,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
+ case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
@@ -1355,6 +1365,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
+ case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
}
return nullptr;
}
@@ -2214,11 +2225,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
// disjoint.
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if (LHSKnown.Zero.getBoolValue()) {
- DAG.computeKnownBits(N.getOperand(1), RHSKnown);
+ KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
@@ -2317,8 +2327,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
- KnownBits LHSKnown;
- DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+ KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
@@ -2405,6 +2414,28 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
+/// Returns true if we should use a direct load into vector instruction
+/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
+static bool usePartialVectorLoads(SDNode *N) {
+ if (!N->hasOneUse())
+ return false;
+
+ // If there are any other uses other than scalar to vector, then we should
+ // keep it as a scalar load -> direct move pattern to prevent multiple
+ // loads. Currently, only check for i64 since we have lxsd/lfd to do this
+ // efficiently, but no update equivalent.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ EVT MemVT = LD->getMemoryVT();
+ if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
+ SDNode *User = *(LD->use_begin());
+ if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -2430,6 +2461,13 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
} else
return false;
+ // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
+ // instructions because we can fold these into a more efficient instruction
+ // instead, (such as LXSD).
+ if (isLoad && usePartialVectorLoads(N)) {
+ return false;
+ }
+
// PowerPC doesn't have preinc load/store instructions for vectors (except
// for QPX, which does have preinc r+r forms).
if (VT.isVector()) {
@@ -2674,7 +2712,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
+ if (Subtarget.isSVR4ABI() &&
+ (Subtarget.isPPC64() || isPositionIndependent())) {
if (Subtarget.isPPC64())
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
@@ -3480,9 +3519,14 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Argument stored in memory.
assert(VA.isMemLoc());
+ // Get the extended size of the argument type in stack
unsigned ArgSize = VA.getLocVT().getStoreSize();
- int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable);
+ // Get the actual size of the argument type
+ unsigned ObjSize = VA.getValVT().getStoreSize();
+ unsigned ArgOffset = VA.getLocMemOffset();
+ // Stack objects in PPC32 are right justified.
+ ArgOffset += ArgSize - ObjSize;
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3935,7 +3979,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1:
@@ -5053,9 +5097,15 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
// All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
// into the call.
- if (isSVR4ABI && isPPC64 && !isPatchPoint) {
+ // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
+ if (isSVR4ABI && isPPC64) {
setUsesTOCBasePtr(DAG);
- Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
+ // We cannot add X2 as an operand here for PATCHPOINT, because there is no
+ // way to mark dependencies as implicit here. We will add the X2 dependency
+ // in EmitInstrWithCustomInserter.
+ if (!isPatchPoint)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
}
return CallOpc;
@@ -5437,10 +5487,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
Arg = PtrOff;
}
- if (VA.isRegLoc()) {
- if (Arg.getValueType() == MVT::i1)
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+ // When useCRBits() is true, there can be i1 arguments.
+ // It is because getRegisterType(MVT::i1) => MVT::i1,
+ // and for other integer types getRegisterType() => MVT::i32.
+ // Extend i1 and ensure callee will get i32.
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, MVT::i32, Arg);
+ if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -6073,7 +6128,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
"Invalid QPX parameter type");
- /* fall through */
+ LLVM_FALLTHROUGH;
case MVT::v4f64:
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
@@ -7228,10 +7283,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
return FP;
}
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
+
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.isVector() && "Expected a vector type.");
+ assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
+
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = Vec;
+ SDValue UndefVec = DAG.getUNDEF(VecVT);
+ for (unsigned i = 1; i < NumConcat; ++i)
+ Ops[i] = UndefVec;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const {
+
+ unsigned Opc = Op.getOpcode();
+ assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+ "Unexpected conversion type");
+ assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+ "Supports conversions to v2f64/v4f32 only.");
+
+ bool SignedConv = Opc == ISD::SINT_TO_FP;
+ bool FourEltRes = Op.getValueType() == MVT::v4f32;
+
+ SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+ EVT WideVT = Wide.getValueType();
+ unsigned WideNumElts = WideVT.getVectorNumElements();
+ MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
+
+ SmallVector<int, 16> ShuffV;
+ for (unsigned i = 0; i < WideNumElts; ++i)
+ ShuffV.push_back(i + WideNumElts);
+
+ int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+ int SaveElts = FourEltRes ? 4 : 2;
+ if (Subtarget.isLittleEndian())
+ for (int i = 0; i < SaveElts; i++)
+ ShuffV[i * Stride] = i;
+ else
+ for (int i = 1; i <= SaveElts; i++)
+ ShuffV[i * Stride - 1] = i - 1;
+
+ SDValue ShuffleSrc2 =
+ SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
+ SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
+ unsigned ExtendOp =
+ SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+ SDValue Extend;
+ if (!Subtarget.hasP9Altivec() && SignedConv) {
+ Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+ DAG.getValueType(Op.getOperand(0).getValueType()));
+ } else
+ Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT OutVT = Op.getValueType();
+ if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+ isOperationCustom(Op.getOpcode(), InVT))
+ return LowerINT_TO_FPVector(Op, DAG, dl);
+
// Conversions to f128 are legal.
if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
return Op;
@@ -8902,35 +9030,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
}
- // We are looking for absolute values here.
- // The idea is to try to fit one of two patterns:
- // max (a, (0-a)) OR max ((0-a), a)
- if (Subtarget.hasP9Vector() &&
- (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
- SDValue V1 = Op.getOperand(1);
- SDValue V2 = Op.getOperand(2);
- if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
- (V1.getSimpleValueType() == MVT::v4i32 ||
- V1.getSimpleValueType() == MVT::v8i16 ||
- V1.getSimpleValueType() == MVT::v16i8)) {
- if ( V1.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
- V1.getOperand(1) == V2 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
- }
-
- if ( V2.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
- V2.getOperand(1) == V1 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
- }
- }
- }
-
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
int CompareOpc;
@@ -9081,30 +9180,6 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
-SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
- // instructions), but for smaller types, we need to first extend up to v2i32
- // before doing going farther.
- if (Op.getValueType() == MVT::v2i64) {
- EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- if (ExtVT != MVT::v2i32) {
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
- DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
- ExtVT.getVectorElementType(), 4)));
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
- Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
- DAG.getValueType(MVT::v2i32));
- }
-
- return Op;
- }
-
- return SDValue();
-}
-
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9495,6 +9570,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() &&
+ "Only set vector abs as custom, scalar abs shouldn't reach here!");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8) &&
+ "Unexpected vector element type!");
+ assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+ "Current subtarget doesn't support smax v2i64!");
+
+ // For vector abs, it can be lowered to:
+ // abs x
+ // ==>
+ // y = -x
+ // smax(x, y)
+
+ SDLoc dl(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+ // SMAX patch https://reviews.llvm.org/D47332
+ // hasn't landed yet, so use intrinsic first here.
+ // TODO: Should use SMAX directly once SMAX patch landed
+ Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+ if (VT == MVT::v2i64)
+ BifID = Intrinsic::ppc_altivec_vmaxsd;
+ else if (VT == MVT::v8i16)
+ BifID = Intrinsic::ppc_altivec_vmaxsh;
+ else if (VT == MVT::v16i8)
+ BifID = Intrinsic::ppc_altivec_vmaxsb;
+
+ return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9544,10 +9657,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -9624,6 +9737,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::BITCAST:
+ // Don't handle bitcast here.
+ return;
}
}
@@ -9787,17 +9903,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
-MachineBasicBlock *
-PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
- MachineBasicBlock *BB,
- bool is8bit, // operation
- unsigned BinOpcode,
- unsigned CmpOpcode,
- unsigned CmpPred) const {
+MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
+ MachineInstr &MI, MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
- return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
- CmpOpcode, CmpPred);
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
+ CmpPred);
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -9821,7 +9934,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB =
- CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
+ CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
if (CmpOpcode)
@@ -9832,22 +9945,25 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg =
+ (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
// thisMBB:
// ...
@@ -9876,82 +9992,107 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
- BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
- .addReg(incr).addReg(ShiftReg);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
- BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BB = loopMBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
- .addReg(Incr2Reg).addReg(TmpDestReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
- .addReg(TmpReg).addReg(MaskReg);
+ .addReg(Incr2Reg)
+ .addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
if (CmpOpcode) {
// For unsigned comparisons, we can directly compare the shifted values.
// For signed comparisons we shift and sign extend.
- unsigned SReg = RegInfo.createVirtualRegister(RC);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+ BuildMI(BB, dl, TII->get(PPC::AND), SReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
unsigned ValueReg = SReg;
unsigned CmpReg = Incr2Reg;
if (CmpOpcode == PPC::CMPW) {
- ValueReg = RegInfo.createVirtualRegister(RC);
+ ValueReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
- .addReg(SReg).addReg(ShiftReg);
- unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
+ .addReg(SReg)
+ .addReg(ShiftReg);
+ unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
- .addReg(ValueReg);
+ .addReg(ValueReg);
ValueReg = ValueSReg;
CmpReg = incr;
}
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
- .addReg(CmpReg).addReg(ValueReg);
+ .addReg(CmpReg)
+ .addReg(ValueReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
+ .addImm(CmpPred)
+ .addReg(PPC::CR0)
+ .addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(exitMBB);
BB = loop2MBB;
}
- BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
- .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX))
- .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpDestReg)
+ .addReg(ShiftReg);
return BB;
}
@@ -9968,10 +10109,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
@@ -10034,10 +10171,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
- .addReg(PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Naked functions never have a base pointer, and so we use r1. For all
@@ -10052,8 +10189,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
- .addReg(BufReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
@@ -10086,8 +10223,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
mainMBB->addSuccessor(sinkMBB);
@@ -10111,10 +10247,6 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -10152,7 +10284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(0)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload IP
if (PVT == MVT::i64) {
@@ -10164,7 +10296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(LabelOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload SP
if (PVT == MVT::i64) {
@@ -10176,7 +10308,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(SPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload BP
if (PVT == MVT::i64) {
@@ -10188,16 +10320,15 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
.addImm(BPOffset)
.addReg(BufReg);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.cloneMemRefs(MI);
// Reload TOC
if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
- .addImm(TOCOffset)
- .addReg(BufReg);
-
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
}
// Jump
@@ -10221,7 +10352,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// way to mark the dependence as implicit there, and so the stackmap code
// will confuse it with a regular operand. Instead, add the dependence
// here.
- setUsesTOCBasePtr(*BB->getParent());
MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
}
@@ -10246,8 +10376,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
+ MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
+ MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -10392,9 +10522,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
- .addReg(HiReg).addReg(ReadAgainReg);
+ .addReg(HiReg)
+ .addReg(ReadAgainReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(CmpReg)
+ .addMBB(readMBB);
BB->addSuccessor(readMBB);
BB->addSuccessor(sinkMBB);
@@ -10564,27 +10697,35 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
- .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
- .addReg(oldval).addReg(dest);
+ .addReg(oldval)
+ .addReg(dest);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(newval).addReg(ptrA).addReg(ptrB);
+ .addReg(newval)
+ .addReg(ptrA)
+ .addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(StoreMnemonic))
- .addReg(dest).addReg(ptrA).addReg(ptrB);
+ .addReg(dest)
+ .addReg(ptrA)
+ .addReg(ptrB);
BB->addSuccessor(exitMBB);
// exitMBB:
@@ -10619,24 +10760,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC =
+ is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
unsigned ShiftReg =
- isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
- unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
- unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
- unsigned MaskReg = RegInfo.createVirtualRegister(RC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
unsigned Ptr1Reg;
- unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
@@ -10673,74 +10816,107 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
- .addReg(ptrA).addReg(ptrB);
+ .addReg(ptrA)
+ .addReg(ptrB);
} else {
Ptr1Reg = ptrB;
}
- BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
- .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+
+ // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+ // mode.
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+ .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+ .addImm(3)
+ .addImm(27)
+ .addImm(is8bit ? 28 : 27);
if (!isLittleEndian)
- BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
- .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg)
+ .addImm(is8bit ? 24 : 16);
if (is64bit)
BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(61);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(61);
else
BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
- .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ .addReg(Ptr1Reg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(29);
BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
- .addReg(newval).addReg(ShiftReg);
+ .addReg(newval)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
- .addReg(oldval).addReg(ShiftReg);
+ .addReg(oldval)
+ .addReg(ShiftReg);
if (is8bit)
BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
else {
BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
- .addReg(Mask3Reg).addImm(65535);
+ .addReg(Mask3Reg)
+ .addImm(65535);
}
BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
- .addReg(Mask2Reg).addReg(ShiftReg);
+ .addReg(Mask2Reg)
+ .addReg(ShiftReg);
BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
- .addReg(NewVal2Reg).addReg(MaskReg);
+ .addReg(NewVal2Reg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
- .addReg(OldVal2Reg).addReg(MaskReg);
+ .addReg(OldVal2Reg)
+ .addReg(MaskReg);
BB = loop1MBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
- BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
- .addReg(TmpDestReg).addReg(MaskReg);
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
- .addReg(TmpReg).addReg(OldVal3Reg);
+ .addReg(TmpReg)
+ .addReg(OldVal3Reg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(midMBB);
BB->addSuccessor(loop2MBB);
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
- .addReg(TmpDestReg).addReg(MaskReg);
- BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
- .addReg(Tmp2Reg).addReg(NewVal3Reg);
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg)
+ .addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
+ .addReg(Tmp2Reg)
+ .addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ .addImm(PPC::PRED_NE)
+ .addReg(PPC::CR0)
+ .addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
- .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(TmpDestReg)
+ .addReg(ZeroReg)
+ .addReg(PtrReg);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
- .addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ .addReg(TmpReg)
+ .addReg(ShiftReg);
} else if (MI.getOpcode() == PPC::FADDrtz) {
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
@@ -10777,9 +10953,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
- &PPC::GPRCRegClass :
- &PPC::G8RCRegClass);
+ unsigned Dest = RegInfo.createVirtualRegister(
+ Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
DebugLoc dl = MI.getDebugLoc();
BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
@@ -11231,9 +11406,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
- KnownBits Op1Known, Op2Known;
- DAG.computeKnownBits(N->getOperand(0), Op1Known);
- DAG.computeKnownBits(N->getOperand(1), Op2Known);
+ KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
+ KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
// We don't really care about what is known about the first bit (if
// anything), so clear it in all masks prior to comparing them.
@@ -11750,6 +11924,37 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+SDValue PPCTargetLowering::combineSetCC(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::SETCC &&
+ "Should be called with a SETCC node");
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+ LHS.hasOneUse())
+ std::swap(LHS, RHS);
+
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+ RHS.hasOneUse()) {
+ SDLoc DL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ EVT OpVT = LHS.getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+ }
+ }
+
+ return DAGCombineTruncBoolExt(N, DCI);
+}
+
// Is this an extending load from an f32 to an f64?
static bool isFPExtLoad(SDValue Op) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
@@ -11869,7 +12074,8 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
}
// Not a build vector of (possibly fp_rounded) loads.
- if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+ if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
+ N->getNumOperands() == 1)
return SDValue();
for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
@@ -12450,6 +12656,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD:
+ return combineADD(N, DCI);
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -12476,7 +12684,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
+ return combineTRUNCATE(N, DCI);
case ISD::SETCC:
+ if (SDValue CSCC = combineSetCC(N, DCI))
+ return CSCC;
+ LLVM_FALLTHROUGH;
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
@@ -12499,9 +12711,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
(Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
- // STBRX can only handle simple types.
+ // STBRX can only handle simple types and it makes no sense to store less
+ // two bytes in byte-reversed order.
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
- if (mVT.isExtended())
+ if (mVT.isExtended() || mVT.getSizeInBits() < 16)
break;
SDValue BSwapOp = N->getOperand(1).getOperand(0);
@@ -12877,6 +13090,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+ // Expose the vabsduw/h/b opportunity for down stream
+ if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+ (IID == Intrinsic::ppc_altivec_vmaxsw ||
+ IID == Intrinsic::ppc_altivec_vmaxsh ||
+ IID == Intrinsic::ppc_altivec_vmaxsb)) {
+ SDValue V1 = N->getOperand(1);
+ SDValue V2 = N->getOperand(2);
+ if ((V1.getSimpleValueType() == MVT::v4i32 ||
+ V1.getSimpleValueType() == MVT::v8i16 ||
+ V1.getSimpleValueType() == MVT::v16i8) &&
+ V1.getSimpleValueType() == V2.getSimpleValueType()) {
+ // (0-a, a)
+ if (V1.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+ V1.getOperand(1) == V2) {
+ return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+ }
+ // (a, 0-a)
+ if (V2.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+ V2.getOperand(1) == V1) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ // (x-y, y-x)
+ if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+ V1.getOperand(0) == V2.getOperand(1) &&
+ V1.getOperand(1) == V2.getOperand(0)) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ }
+ }
}
break;
@@ -13109,6 +13355,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
+ case ISD::ABS:
+ return combineABS(N, DCI);
+ case ISD::VSELECT:
+ return combineVSelect(N, DCI);
}
return SDValue();
@@ -13251,7 +13501,8 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
} else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf" || Constraint == "ws") {
+ Constraint == "wf" || Constraint == "ws" ||
+ Constraint == "wi") {
return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
@@ -13281,6 +13532,8 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
return CW_Register;
else if (StringRef(constraint) == "ws" && type->isDoubleTy())
return CW_Register;
+ else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
+ return CW_Register; // just hold 64-bit integers data.
switch (*constraint) {
default:
@@ -13363,7 +13616,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// An individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
} else if ((Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf") && Subtarget.hasVSX()) {
+ Constraint == "wf" || Constraint == "wi") &&
+ Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
} else if (Constraint == "ws" && Subtarget.hasVSX()) {
if (VT == MVT::f32 && Subtarget.hasP8Vector())
@@ -13598,6 +13852,35 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
report_fatal_error("Invalid register name global variable");
}
+bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
+ // 32-bit SVR4 ABI access everything as got-indirect.
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ return true;
+
+ CodeModel::Model CModel = getTargetMachine().getCodeModel();
+ // If it is small or large code model, module locals are accessed
+ // indirectly by loading their address from .toc/.got. The difference
+ // is that for large code model we have ADDISTocHa + LDtocL and for
+ // small code model we simply have LDtoc.
+ if (CModel == CodeModel::Small || CModel == CodeModel::Large)
+ return true;
+
+ // JumpTable and BlockAddress are accessed as got-indirect.
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
+ return true;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
+ // The NLP flag indicates that a global access has to use an
+ // extra indirection.
+ if (GVFlags & PPCII::MO_NLP_FLAG)
+ return true;
+ }
+
+ return false;
+}
+
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
@@ -14116,7 +14399,30 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
return Value;
- return SDValue();
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Subtarget.isISA3_0() ||
+ N0.getOpcode() != ISD::SIGN_EXTEND ||
+ N0.getOperand(0).getValueType() != MVT::i32 ||
+ CN1 == nullptr || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // We can't save an operation here if the value is already extended, and
+ // the existing shift is easier to combine.
+ SDValue ExtsSrc = N0.getOperand(0);
+ if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
+ ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
+ return SDValue();
+
+ SDLoc DL(N0);
+ SDValue ShiftBy = SDValue(CN1, 0);
+ // We want the shift amount to be i32 on the extswli, but the shift could
+ // have an i64.
+ if (ShiftBy.getValueType() == MVT::i64)
+ ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
+
+ return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
+ ShiftBy);
}
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
@@ -14133,6 +14439,152 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
}
+// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
+// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
+// When C is zero, the equation (addi Z, -C) can be simplified to Z
+// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
+static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isPPC64())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ auto isZextOfCompareWithConstant = [](SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
+ Op.getValueType() != MVT::i64)
+ return false;
+
+ SDValue Cmp = Op.getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
+ Cmp.getOperand(0).getValueType() != MVT::i64)
+ return false;
+
+ if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+ // Due to the limitations of the addi instruction,
+ // -C is required to be [-32768, 32767].
+ return isInt<16>(NegConstant);
+ }
+
+ return false;
+ };
+
+ bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
+ bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
+
+ // If there is a pattern, canonicalize a zext operand to the RHS.
+ if (LHSHasPattern && !RHSHasPattern)
+ std::swap(LHS, RHS);
+ else if (!LHSHasPattern && !RHSHasPattern)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
+ SDValue Cmp = RHS.getOperand(0);
+ SDValue Z = Cmp.getOperand(0);
+ auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
+
+ assert(Constant && "Constant Should not be a null pointer.");
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+
+ switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
+ default: break;
+ case ISD::SETNE: {
+ // when C == 0
+ // --> addze X, (addic Z, -1).carry
+ // /
+ // add X, (zext(setne Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (addic (addi Z, -C), -1).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Addc.getNode(), 1));
+ }
+ case ISD::SETEQ: {
+ // when C == 0
+ // --> addze X, (subfic Z, 0).carry
+ // /
+ // add X, (zext(sete Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (subfic (addi Z, -C), 0).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ DAG.getConstant(0, DL, MVT::i64), AddOrZ);
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Subc.getNode(), 1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
+ return Value;
+
+ return SDValue();
+}
+
+// Detect TRUNCATE operations on bitcasts of float128 values.
+// What we are looking for here is the situtation where we extract a subset
+// of bits from a 128 bit float.
+// This can be of two forms:
+// 1) BITCAST of f128 feeding TRUNCATE
+// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
+// The reason this is required is because we do not have a legal i128 type
+// and so we want to prevent having to store the f128 and then reload part
+// of it.
+SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // If we are using CRBits then try that first.
+ if (Subtarget.useCRBits()) {
+ // Check if CRBits did anything and return that if it did.
+ if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
+ return CRTruncValue;
+ }
+
+ SDLoc dl(N);
+ SDValue Op0 = N->getOperand(0);
+
+ // Looking for a truncate of i128 to i64.
+ if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+ // SRL feeding TRUNCATE.
+ if (Op0.getOpcode() == ISD::SRL) {
+ ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ // The right shift has to be by 64 bits.
+ if (!ConstNode || ConstNode->getZExtValue() != 64)
+ return SDValue();
+
+ // Switch the element number to extract.
+ EltToExtract = EltToExtract ? 0 : 1;
+ // Update Op0 past the SRL.
+ Op0 = Op0.getOperand(0);
+ }
+
+ // BITCAST feeding a TRUNCATE possibly via SRL.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getValueType() == MVT::i128 &&
+ Op0.getOperand(0).getValueType() == MVT::f128) {
+ SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
+ return DCI.DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
+ DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
+ }
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
@@ -14168,6 +14620,15 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
+bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ if (!Subtarget.hasVSX())
+ return false;
+ if (Subtarget.hasP9Vector() && VT == MVT::f128)
+ return true;
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::v4f32 || VT == MVT::v2f64;
+}
+
bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
@@ -14184,3 +14645,109 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
// For non-constant masks, we can always use the record-form and.
return true;
}
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ if (N->getOperand(0).getOpcode() == ISD::SUB) {
+ // Even for signed integers, if it's known to be positive (as signed
+ // integer) due to zero-extended inputs.
+ unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+ unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+ if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+ SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ (SubOpcd1 == ISD::ZERO_EXTEND ||
+ SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ // For type v4i32, it can be optimized with xvnegsp + vabsduw
+ if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+ N->getOperand(0).hasOneUse()) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(1, dl, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+// For type v4i32/v8ii16/v16i8, transform
+// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
+// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
+SDValue PPCTargetLowering::combineVSelect(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueOpnd = N->getOperand(1);
+ SDValue FalseOpnd = N->getOperand(2);
+ EVT VT = N->getOperand(1).getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
+ FalseOpnd.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ // ABSD only available for type v4i32/v8i16/v16i8
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ // At least to save one more dependent computation
+ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Can only handle unsigned comparison here
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ std::swap(TrueOpnd, FalseOpnd);
+ break;
+ }
+
+ SDValue CmpOpnd1 = Cond.getOperand(0);
+ SDValue CmpOpnd2 = Cond.getOperand(1);
+
+ // SETCC CmpOpnd1 CmpOpnd2 cond
+ // TrueOpnd = CmpOpnd1 - CmpOpnd2
+ // FalseOpnd = CmpOpnd2 - CmpOpnd1
+ if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
+ TrueOpnd.getOperand(1) == CmpOpnd2 &&
+ FalseOpnd.getOperand(0) == CmpOpnd2 &&
+ FalseOpnd.getOperand(1) == CmpOpnd1) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
+ CmpOpnd1, CmpOpnd2,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f174943a8004..30acd60eba6f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -149,6 +149,10 @@ namespace llvm {
/// For vector types, only the last n bits are used. See vsld.
SRL, SRA, SHL,
+ /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+ /// word and shift left immediate.
+ EXTSWSLI,
+
/// The combination of sra[wd]i and addze used to implemented signed
/// integer division by a power of 2. The first operand is the dividend,
/// and the second is the constant shift amount (representing the
@@ -369,6 +373,21 @@ namespace llvm {
/// An SDNode for swaps that are not associated with any loads/stores
/// and thereby have no chain.
SWAP_NO_CHAIN,
+
+ /// An SDNode for Power9 vector absolute value difference.
+ /// operand #0 vector
+ /// operand #1 vector
+ /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+ /// the most significant bit for signed i32
+ ///
+ /// Power9 VABSD* instructions are designed to support unsigned integer
+ /// vectors (byte/halfword/word), if we want to make use of them for signed
+ /// integer vectors, we have to flip their sign bits first. To flip sign bit
+ /// for byte/halfword integer vector would become inefficient, but for word
+ /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+ /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+ /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
+ VABSD,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,
@@ -557,6 +576,11 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool isSelectSupported(SelectSupportKind Kind) const override {
+ // PowerPC does not support scalar condition selects on vectors.
+ return (Kind != SelectSupportKind::ScalarCondVectorVal);
+ }
+
/// getPreferredVectorAction - The code we generate when vector types are
/// legalized by promoting the integer element type is often much worse
/// than code we generate if we widen the type for applicable vector types.
@@ -565,7 +589,7 @@ namespace llvm {
/// of v4i8's and shuffle them. This will turn into a mess of 8 extending
/// loads, moves back into VSR's (or memory ops if we don't have moves) and
/// then the VPERM for the shuffle. All in all a very slow sequence.
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
@@ -785,6 +809,9 @@ namespace llvm {
return true;
}
+ // Returns true if the address of the global is stored in TOC entry.
+ bool isAccessedAsGotIndirect(SDValue N) const;
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -923,6 +950,9 @@ namespace llvm {
SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
+ SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -988,6 +1018,7 @@ namespace llvm {
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1088,6 +1119,11 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -1122,6 +1158,7 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
}; // end class PPCTargetLowering
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index cdd57c6a1118..2ce6ad3293eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -94,7 +94,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR8] in
- def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -199,47 +199,45 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
- [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_MAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
- [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMIN_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
- [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
- def ATOMIC_LOAD_UMAX_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
- [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
-
- def ATOMIC_SWAP_I64 : Pseudo<
- (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
- [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
+ [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_MAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
+ [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
+ [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
+ [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
// Instructions to support atomic operations
@@ -269,18 +267,18 @@ def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi8 :Pseudo< (outs),
+def TCRETURNdi8 :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd8 $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai8 :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
@@ -347,14 +345,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR8] in
- def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp64 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP64",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In64BitMode]>;
@@ -396,10 +399,10 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
// the POWER3.
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
let Defs = [LR8] in {
@@ -717,9 +720,10 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
"extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- []>, isPPC64;
+ [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+ isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC)
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
- "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
+ "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+}
def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
"darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
@@ -1018,19 +1026,19 @@ def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtoc: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtoc",
[(set i64:$rD,
(PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocJTI: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocJTI",
[(set i64:$rD,
(PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocCPT: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
@@ -1071,40 +1079,40 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
-def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
let mayLoad = 1 in
-def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
}
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISgotTprelHA",
[(set i64:$rD,
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
-let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
-def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : PPCPostRAExpPseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsgdHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsgdHA",
[(set i64:$rD,
(PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsgdL",
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1115,7 +1123,7 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// correct because the branch select pass is relying on it.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1125,7 +1133,7 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsgdLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsgdLADDR",
[(set i64:$rD,
@@ -1133,12 +1141,12 @@ def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsldHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
(PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsldL",
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1147,7 +1155,7 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1157,7 +1165,7 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
-def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsldLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsldLADDR",
[(set i64:$rD,
@@ -1165,13 +1173,13 @@ def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISdtprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
(PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIdtprelL",
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1221,30 +1229,30 @@ def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -1252,13 +1260,13 @@ def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ "stdu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res),
(ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+ "stdux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 24969d7ef853..69b19e45c3e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)),
def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
(VRFIN $vA)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
+ (VSEL $vC, $vB, $vA)>;
+
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index f5f4b46344cf..2fe765dd99e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -2153,7 +2153,9 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
}
//===----------------------------------------------------------------------===//
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter
+// stuff
+class PPCEmitTimePseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
let isCodeGenOnly = 1;
let PPC64 = 0;
@@ -2162,6 +2164,21 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
let hasNoSchedulingInfo = 1;
}
+// Instruction that require custom insertion support
+// a.k.a. ISelPseudos, however, these won't have isPseudo set
+class PPCCustomInserterPseudo<dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let usesCustomInserter = 1;
+}
+
+// PostRAPseudo will be expanded in expandPostRAPseudo, isPseudo flag in td
+// files is set only for PostRAPseudo
+class PPCPostRAExpPseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+ let isPseudo = 1;
+}
+
class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
+ : PPCPostRAExpPseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6c4e2129087c..0efe797c765d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -20,8 +20,8 @@ def HTM_get_imm : SDNodeXForm<imm, [{
return getI32Imm (N->getZExtValue(), SDLoc(N));
}]>;
-let hasSideEffects = 1, usesCustomInserter = 1 in {
-def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+let hasSideEffects = 1 in {
+def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0930f7d3b8d7..d754ce2990d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -987,7 +987,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::XXLORf;
+ Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
@@ -1429,17 +1429,15 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(PPC::BCLR));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(PPC::BCLRn));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
} else {
MI.setDesc(get(PPC::BCCLR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
}
return true;
@@ -1454,7 +1452,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1462,7 +1460,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCn));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1471,13 +1469,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MI.setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg())
+ .add(Pred[1])
.addMBB(MBB);
}
return true;
- } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
- OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
+ OpC == PPC::BCTRL8) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
@@ -1487,14 +1485,12 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
- MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addReg(Pred[1].getReg());
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
return true;
}
@@ -1502,7 +1498,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
: (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
- .addReg(Pred[1].getReg());
+ .add(Pred[1]);
return true;
}
@@ -1822,7 +1818,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int NewOpC = -1;
int MIOpC = MI->getOpcode();
- if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
+ MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
NewOpC = MIOpC;
else {
NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1912,14 +1909,36 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// compare).
// Rotates are expensive instructions. If we're emitting a record-form
- // rotate that can just be an andi, we should just emit the andi.
- if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) &&
- MI->getOperand(2).getImm() == 0) {
+ // rotate that can just be an andi/andis, we should just emit that.
+ if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
+ unsigned GPRRes = MI->getOperand(0).getReg();
+ int64_t SH = MI->getOperand(2).getImm();
int64_t MB = MI->getOperand(3).getImm();
int64_t ME = MI->getOperand(4).getImm();
- if (MB < ME && MB >= 16) {
- uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
- NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8;
+ // We can only do this if both the start and end of the mask are in the
+ // same halfword.
+ bool MBInLoHWord = MB >= 16;
+ bool MEInLoHWord = ME >= 16;
+ uint64_t Mask = ~0LLU;
+
+ if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
+ Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
+ // The mask value needs to shift right 16 if we're emitting andis.
+ Mask >>= MBInLoHWord ? 0 : 16;
+ NewOpC = MIOpC == PPC::RLWINM ?
+ (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
+ (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+ } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
+ (ME - MB + 1 == SH) && (MB >= 16)) {
+ // If we are rotating by the exact number of bits as are in the mask
+ // and the mask is in the least significant bits of the register,
+ // that's just an andis. (as long as the GPR result has no uses).
+ Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
+ Mask >>= 16;
+ NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+ }
+ // If we've set the mask, we can transform.
+ if (Mask != ~0LLU) {
MI->RemoveOperand(4);
MI->RemoveOperand(3);
MI->getOperand(2).setImm(Mask);
@@ -2088,11 +2107,9 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
return true;
}
-#ifndef NDEBUG
static bool isAnImmediateOperand(const MachineOperand &MO) {
return MO.isCPI() || MO.isGlobal() || MO.isImm();
}
-#endif
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto &MBB = *MI.getParent();
@@ -2231,6 +2248,35 @@ static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
return PPC::NoRegister;
}
+void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
+ unsigned OpNo,
+ int64_t Imm) const {
+ assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
+ // Replace the REG with the Immediate.
+ unsigned InUseReg = MI.getOperand(OpNo).getReg();
+ MI.getOperand(OpNo).ChangeToImmediate(Imm);
+
+ if (empty(MI.implicit_operands()))
+ return;
+
+ // We need to make sure that the MI didn't have any implicit use
+ // of this REG any more.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
+ if (UseOpIdx >= 0) {
+ MachineOperand &MO = MI.getOperand(UseOpIdx);
+ if (MO.isImplicit())
+ // The operands must always be in the following order:
+ // - explicit reg defs,
+ // - other explicit operands (reg uses, immediates, etc.),
+ // - implicit reg defs
+ // - implicit reg uses
+ // Therefore, removing the implicit operand won't change the explicit
+ // operands layout.
+ MI.RemoveOperand(UseOpIdx);
+ }
+}
+
// Replace an instruction with one that materializes a constant (and sets
// CR0 if the original instruction was a record-form instruction).
void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
@@ -2256,10 +2302,11 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
.addImm(LII.Imm);
}
-MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
- unsigned &ConstOp,
- bool &SeenIntermediateUse) const {
- ConstOp = ~0U;
+MachineInstr *PPCInstrInfo::getForwardingDefMI(
+ MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const {
+ OpNoForForwarding = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2276,7 +2323,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ OpNoForForwarding = i;
break;
}
}
@@ -2297,7 +2344,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
- if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+ if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2319,15 +2366,22 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
if (PPC::G8RCRegClass.contains(Reg))
Reg = Reg - PPC::X0 + PPC::R0;
- // Is this register defined by a load-immediate in this block?
+ // Is this register defined by some form of add-immediate (including
+ // load-immediate) within this basic block?
for ( ; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo())) {
- if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
- ConstOp = i;
+ switch (It->getOpcode()) {
+ default: break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::ADDItocL:
+ case PPC::ADDI:
+ case PPC::ADDI8:
+ OpNoForForwarding = i;
return &*It;
- } else
- break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
+ }
+ break;
+ } else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
@@ -2335,7 +2389,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
}
}
}
- return ConstOp == ~0U ? nullptr : DefMI;
+ return OpNoForForwarding == ~0U ? nullptr : DefMI;
}
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -2371,35 +2425,48 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
}
// If this instruction has an immediate form and one of its operands is a
-// result of a load-immediate, convert it to the immediate form if the constant
-// is in range.
+// result of a load-immediate or an add-immediate, convert it to
+// the immediate form if the constant is in range.
bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef) const {
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
bool PostRA = !MRI->isSSA();
bool SeenIntermediateUse = true;
- unsigned ConstantOperand = ~0U;
- MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
- SeenIntermediateUse);
- if (!DefMI || !DefMI->getOperand(1).isImm())
+ unsigned ForwardingOperand = ~0U;
+ MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
+ SeenIntermediateUse);
+ if (!DefMI)
+ return false;
+ assert(ForwardingOperand < MI.getNumOperands() &&
+ "The forwarding operand needs to be valid at this point");
+ bool KillFwdDefMI = !SeenIntermediateUse &&
+ MI.getOperand(ForwardingOperand).isKill();
+ if (KilledDef && KillFwdDefMI)
+ *KilledDef = DefMI;
+
+ ImmInstrInfo III;
+ bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by an add-immediate,
+ // try to convert it.
+ if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
+ *DefMI, KillFwdDefMI))
+ return true;
+
+ if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
+ !DefMI->getOperand(1).isImm())
return false;
- assert(ConstantOperand < MI.getNumOperands() &&
- "The constant operand needs to be valid at this point");
int64_t Immediate = DefMI->getOperand(1).getImm();
// Sign-extend to 64-bits.
int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
(Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
- if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
- !SeenIntermediateUse)
- *KilledDef = DefMI;
-
- // If this is a reg+reg instruction that has a reg+imm form, convert it now.
- ImmInstrInfo III;
- if (instrHasImmForm(MI, III))
- return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+ // If this is a reg+reg instruction that has a reg+imm form,
+ // and one of the operands is produced by LI, convert it now.
+ if (HasImmForm)
+ return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
@@ -2443,7 +2510,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
- CompareUseMI.getOperand(1).ChangeToImmediate(0);
+ replaceInstrOperandWithImm(CompareUseMI, 1, 0);
CompareUseMI.RemoveOperand(3);
CompareUseMI.RemoveOperand(2);
continue;
@@ -2602,18 +2669,23 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
+static bool isVFReg(unsigned Reg) {
+ return PPC::VFRCRegClass.contains(Reg);
+}
+
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
- ImmInstrInfo &III) const {
+ ImmInstrInfo &III, bool PostRA) const {
unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
// to turn into a displacement and move operand 1 to the operand 2 position.
III.ImmOpNo = 2;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
III.ImmWidth = 16;
III.ImmMustBeMultipleOf = 1;
III.TruncateImmTo = 0;
+ III.IsSummingOperands = false;
switch (Opc) {
default: return false;
case PPC::ADD4:
@@ -2622,6 +2694,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 1;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
break;
case PPC::ADDC:
@@ -2630,6 +2703,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
break;
case PPC::ADDCo:
@@ -2637,6 +2711,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpcode = PPC::ADDICo;
break;
case PPC::SUBFC:
@@ -2809,8 +2884,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
@@ -2866,8 +2942,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ZeroIsSpecialOrig = 2;
III.ZeroIsSpecialNew = 3;
III.IsCommutative = false;
+ III.IsSummingOperands = true;
III.ImmOpNo = 2;
- III.ConstantOpNo = 3;
+ III.OpNoForForwarding = 3;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
@@ -2898,21 +2975,30 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
}
break;
- // Power9 only.
+ // Power9 and up only. For some of these, the X-Form version has access to all
+ // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
+ // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
+ // into or stored from is one of the VR registers.
case PPC::LXVX:
case PPC::LXSSPX:
case PPC::LXSDX:
case PPC::STXVX:
case PPC::STXSSPX:
case PPC::STXSDX:
+ case PPC::XFLOADf32:
+ case PPC::XFLOADf64:
+ case PPC::XFSTOREf32:
+ case PPC::XFSTOREf64:
if (!Subtarget.hasP9Vector())
return false;
III.SignedImm = true;
III.ZeroIsSpecialOrig = 1;
III.ZeroIsSpecialNew = 2;
III.IsCommutative = true;
+ III.IsSummingOperands = true;
III.ImmOpNo = 1;
- III.ConstantOpNo = 2;
+ III.OpNoForForwarding = 2;
+ III.ImmMustBeMultipleOf = 4;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::LXVX:
@@ -2920,24 +3006,64 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ImmMustBeMultipleOf = 16;
break;
case PPC::LXSSPX:
- III.ImmOpcode = PPC::LXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSSP;
+ else {
+ III.ImmOpcode = PPC::LFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf32:
+ III.ImmOpcode = PPC::DFLOADf32;
break;
case PPC::LXSDX:
- III.ImmOpcode = PPC::LXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::LXSD;
+ else {
+ III.ImmOpcode = PPC::LFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFLOADf64:
+ III.ImmOpcode = PPC::DFLOADf64;
break;
case PPC::STXVX:
III.ImmOpcode = PPC::STXV;
III.ImmMustBeMultipleOf = 16;
break;
case PPC::STXSSPX:
- III.ImmOpcode = PPC::STXSSP;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSSP;
+ else {
+ III.ImmOpcode = PPC::STFS;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf32:
+ III.ImmOpcode = PPC::DFSTOREf32;
break;
case PPC::STXSDX:
- III.ImmOpcode = PPC::STXSD;
- III.ImmMustBeMultipleOf = 4;
+ if (PostRA) {
+ if (isVFReg(MI.getOperand(0).getReg()))
+ III.ImmOpcode = PPC::STXSD;
+ else {
+ III.ImmOpcode = PPC::STFD;
+ III.ImmMustBeMultipleOf = 1;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case PPC::XFSTOREf64:
+ III.ImmOpcode = PPC::DFSTOREf64;
break;
}
break;
@@ -2984,13 +3110,264 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
}
}
-bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo,
- int64_t Imm) const {
+// Check if the 'MI' that has the index OpNoForForwarding
+// meets the requirement described in the ImmInstrInfo.
+bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding
+ ) const {
+ // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
+ // would not work pre-RA, we can only do the check post RA.
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // Cannot do the transform if MI isn't summing the operands.
+ if (!III.IsSummingOperands)
+ return false;
+
+ // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
+ if (!III.ZeroIsSpecialOrig)
+ return false;
+
+ // We cannot do the transform if the operand we are trying to replace
+ // isn't the same as the operand the instruction allows.
+ if (OpNoForForwarding != III.OpNoForForwarding)
+ return false;
+
+ // Check if the instruction we are trying to transform really has
+ // the special zero register as its operand.
+ if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
+ MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
+ return false;
+
+ // This machine instruction is convertible if it is,
+ // 1. summing the operands.
+ // 2. one of the operands is special zero register.
+ // 3. the operand we are trying to replace is allowed by the MI.
+ return true;
+}
+
+// Check if the DefMI is the add inst and set the ImmMO and RegMO
+// accordingly.
+bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const {
+ unsigned Opc = DefMI.getOpcode();
+ if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ assert(DefMI.getNumOperands() >= 3 &&
+ "Add inst must have at least three operands");
+ RegMO = &DefMI.getOperand(1);
+ ImmMO = &DefMI.getOperand(2);
+
+ // This DefMI is elgible for forwarding if it is:
+ // 1. add inst
+ // 2. one of the operands is Imm/CPI/Global.
+ return isAnImmediateOperand(*ImmMO);
+}
+
+bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI
+ ) const {
+ // x = addi y, imm
+ // ...
+ // z = lfdx 0, x -> z = lfd imm(y)
+ // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
+ // of "y" between the DEF of "x" and "z".
+ // The query is only valid post RA.
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return false;
+
+ // MachineInstr::readsRegister only returns true if the machine
+ // instruction reads the exact register or its super-register. It
+ // does not consider uses of sub-registers which seems like strange
+ // behaviour. Nonetheless, if we end up with a 64-bit register here,
+ // get the corresponding 32-bit register to check.
+ unsigned Reg = RegMO.getReg();
+ if (PPC::G8RCRegClass.contains(Reg))
+ Reg = Reg - PPC::X0 + PPC::R0;
+
+ // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
+ MachineBasicBlock::const_reverse_iterator It = MI;
+ MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
+ It++;
+ for (; It != E; ++It) {
+ if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ return false;
+ // Made it to DefMI without encountering a clobber.
+ if ((&*It) == &DefMI)
+ break;
+ }
+ assert((&*It) == &DefMI && "DefMI is missing");
+
+ // If DefMI also uses the register to be forwarded, we can only forward it
+ // if DefMI is being erased.
+ if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+ return KillDefMI;
+
+ return true;
+}
+
+bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const {
+ assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
+ if (DefMI.getOpcode() == PPC::ADDItocL) {
+ // The operand for ADDItocL is CPI, which isn't imm at compiling time,
+ // However, we know that, it is 16-bit width, and has the alignment of 4.
+ // Check if the instruction met the requirement.
+ if (III.ImmMustBeMultipleOf > 4 ||
+ III.TruncateImmTo || III.ImmWidth != 16)
+ return false;
+
+ // Going from XForm to DForm loads means that the displacement needs to be
+ // not just an immediate but also a multiple of 4, or 16 depending on the
+ // load. A DForm load cannot be represented if it is a multiple of say 2.
+ // XForm loads do not have this restriction.
+ if (ImmMO.isGlobal() &&
+ ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
+ return false;
+
+ return true;
+ }
+
+ if (ImmMO.isImm()) {
+ // It is Imm, we need to check if the Imm fit the range.
+ int64_t Immediate = ImmMO.getImm();
+ // Sign-extend to 64-bits.
+ Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+ (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+ if (Imm % III.ImmMustBeMultipleOf)
+ return false;
+ if (III.TruncateImmTo)
+ Imm &= ((1 << III.TruncateImmTo) - 1);
+ if (III.SignedImm) {
+ APInt ActualValue(64, Imm, true);
+ if (!ActualValue.isSignedIntN(III.ImmWidth))
+ return false;
+ } else {
+ uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+ if ((uint64_t)Imm > UnsignedMax)
+ return false;
+ }
+ }
+ else
+ return false;
+
+ // This ImmMO is forwarded if it meets the requriement describle
+ // in ImmInstrInfo
+ return true;
+}
+
+// If an X-Form instruction is fed by an add-immediate and one of its operands
+// is the literal zero, attempt to forward the source of the add-immediate to
+// the corresponding D-Form instruction with the displacement coming from
+// the immediate being added.
+bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned OpNoForForwarding,
+ MachineInstr &DefMI,
+ bool KillDefMI) const {
+ // RegMO ImmMO
+ // | |
+ // x = addi reg, imm <----- DefMI
+ // y = op 0 , x <----- MI
+ // |
+ // OpNoForForwarding
+ // Check if the MI meet the requirement described in the III.
+ if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
+ return false;
+
+ // Check if the DefMI meet the requirement
+ // described in the III. If yes, set the ImmMO and RegMO accordingly.
+ MachineOperand *ImmMO = nullptr;
+ MachineOperand *RegMO = nullptr;
+ if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+ return false;
+ assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+ // As we get the Imm operand now, we need to check if the ImmMO meet
+ // the requirement described in the III. If yes set the Imm.
+ int64_t Imm = 0;
+ if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
+ return false;
+
+ // Check if the RegMO can be forwarded to MI.
+ if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+ return false;
+
+ // We know that, the MI and DefMI both meet the pattern, and
+ // the Imm also meet the requirement with the new Imm-form.
+ // It is safe to do the transformation now.
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+
+ // Update the base reg first.
+ MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
+ false, false,
+ RegMO->isKill());
+
+ // Then, update the imm.
+ if (ImmMO->isImm()) {
+ // If the ImmMO is Imm, change the operand that has ZERO to that Imm
+ // directly.
+ replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);
+ }
+ else {
+ // Otherwise, it is Constant Pool Index(CPI) or Global,
+ // which is relocation in fact. We need to replace the special zero
+ // register with ImmMO.
+ // Before that, we need to fixup the target flags for imm.
+ // For some reason, we miss to set the flag for the ImmMO if it is CPI.
+ if (DefMI.getOpcode() == PPC::ADDItocL)
+ ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
+
+ // MI didn't have the interface such as MI.setOperand(i) though
+ // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
+ // ImmMO, we need to remove ZERO operand and all the operands behind it,
+ // and, add the ImmMO, then, move back all the operands behind ZERO.
+ SmallVector<MachineOperand, 2> MOps;
+ for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
+ MOps.push_back(MI.getOperand(i));
+ MI.RemoveOperand(i);
+ }
+
+ // Remove the last MO in the list, which is ZERO operand in fact.
+ MOps.pop_back();
+ // Add the imm operand.
+ MI.addOperand(*ImmMO);
+ // Now add the rest back.
+ for (auto &MO : MOps)
+ MI.addOperand(MO);
+ }
+
+ // Update the opcode.
+ MI.setDesc(get(III.ImmOpcode));
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
+ const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
- if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+ if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
return false;
if (Imm % III.ImmMustBeMultipleOf)
return false;
@@ -3035,7 +3412,7 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
MI.setDesc(get(III.ImmOpcode));
- if (ConstantOpNo == III.ConstantOpNo) {
+ if (ConstantOpNo == III.OpNoForForwarding) {
// Converting shifts to immediate form is a bit tricky since they may do
// one of three things:
// 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
@@ -3063,42 +3440,47 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
// Left shifts use (N, 63-N), right shifts use (64-N, N).
uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
- MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+ replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
}
}
} else
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
}
// Convert commutative instructions (switch the operands and convert the
// desired one to an immediate.
else if (III.IsCommutative) {
- MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
- swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+ replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
+ swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
} else
llvm_unreachable("Should have exited early!");
// For instructions for which the constant register replaces a different
// operand than where the immediate goes, we need to swap them.
- if (III.ConstantOpNo != III.ImmOpNo)
- swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+ if (III.OpNoForForwarding != III.ImmOpNo)
+ swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);
- // If the R0/X0 register is special for the original instruction and not for
- // the new instruction (or vice versa), we need to fix up the register class.
+ // If the special R0/X0 register index are different for original instruction
+ // and new instruction, we need to fix up the register class in new
+ // instruction.
if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
- if (!III.ZeroIsSpecialOrig) {
+ if (III.ZeroIsSpecialNew) {
+ // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
+ // need to fix up register class.
unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
- const TargetRegisterClass *NewRC =
- MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
- &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
- MRI.setRegClass(RegToModify, NewRC);
+ if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+ const TargetRegisterClass *NewRC =
+ MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+ &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+ MRI.setRegClass(RegToModify, NewRC);
+ }
}
}
return true;
@@ -3140,6 +3522,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo ||
Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 ||
Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo ||
+ Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
Opcode == PPC::EXTSB8_32_64)
return true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ba82f56a2464..7ed558b835af 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -91,8 +91,8 @@ struct ImmInstrInfo {
uint64_t ZeroIsSpecialNew : 3;
// Is the operation commutative?
uint64_t IsCommutative : 1;
- // The operand number to check for load immediate.
- uint64_t ConstantOpNo : 3;
+ // The operand number to check for add-immediate def.
+ uint64_t OpNoForForwarding : 3;
// The operand number for the immediate.
uint64_t ImmOpNo : 3;
// The opcode of the new instruction.
@@ -101,6 +101,8 @@ struct ImmInstrInfo {
uint64_t ImmWidth : 5;
// The immediate should be truncated to N bits.
uint64_t TruncateImmTo : 5;
+ // Is the instruction summing the operand
+ uint64_t IsSummingOperands : 1;
};
// Information required to convert an instruction to just a materialized
@@ -123,10 +125,42 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
- bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, int64_t Imm) const;
- MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
- bool &SeenIntermediateUse) const;
+
+ // If the inst has imm-form and one of its operand is produced by a LI,
+ // put the imm into the inst directly and remove the LI if possible.
+ bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo, int64_t Imm) const;
+ // If the inst has imm-form and one of its operand is produced by an
+ // add-immediate, try to transform it when possible.
+ bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned ConstantOpNo,
+ MachineInstr &DefMI,
+ bool KillDefMI) const;
+ // Try to find that, if the instruction 'MI' contains any operand that
+ // could be forwarded from some inst that feeds it. If yes, return the
+ // Def of that operand. And OpNoForForwarding is the operand index in
+ // the 'MI' for that 'Def'. If we see another use of this Def between
+ // the Def and the MI, SeenIntermediateUse becomes 'true'.
+ MachineInstr *getForwardingDefMI(MachineInstr &MI,
+ unsigned &OpNoForForwarding,
+ bool &SeenIntermediateUse) const;
+
+ // Can the user MI have it's source at index \p OpNoForForwarding
+ // forwarded from an add-immediate that feeds it?
+ bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III,
+ unsigned OpNoForForwarding) const;
+ bool isDefMIElgibleForForwarding(MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ MachineOperand *&ImmMO,
+ MachineOperand *&RegMO) const;
+ bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
+ const MachineInstr &DefMI,
+ const ImmInstrInfo &III,
+ int64_t &Imm) const;
+ bool isRegElgibleForForwarding(const MachineOperand &RegMO,
+ const MachineInstr &DefMI,
+ const MachineInstr &MI,
+ bool KillDefMI) const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
virtual void anchor();
@@ -158,6 +192,16 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ static bool isSameClassPhysRegCopy(unsigned Opcode) {
+ unsigned CopyOpcodes[] =
+ { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+ PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+ PPC::CROR, PPC::EVOR, -1U };
+ for (int i = 0; CopyOpcodes[i] != -1U; i++)
+ if (Opcode == CopyOpcodes[i])
+ return true;
+ return false;
+ }
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
@@ -369,8 +413,30 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
-
- bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
+ void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
+ int64_t Imm) const;
+
+ bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+ bool PostRA) const;
+
+ /// getRegNumForOperand - some operands use different numbering schemes
+ /// for the same registers. For example, a VSX instruction may have any of
+ /// vs0-vs63 allocated whereas an Altivec instruction could only have
+ /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+ /// register number needed for the opcode/operand number combination.
+ /// The operand number argument will be useful when we need to extend this
+ /// to instructions that use both Altivec and VSX numbering (for different
+ /// operands).
+ static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+ unsigned OpNo) {
+ if (Desc.TSFlags & PPCII::UseVSXReg) {
+ if (isVRRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::V0);
+ else if (isVFRegister(Reg))
+ Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ }
+ return Reg;
+ }
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1a43037e4a4b..dd3f1ac79089 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,6 +114,10 @@ def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -218,6 +222,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+
// Move 2 i64 values into a VSX register
def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTypeProfile<1, 2,
@@ -1189,77 +1195,76 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
-// Pseudo-instructions:
+// Pseudo instructions:
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKDOWN $amt1 $amt2",
[(callseq_start timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
"#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
"UPDATE_VRSAVE $rD, $rS", []>;
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
// Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
// because either operand might become the first operand in an isel, and
// that operand cannot be r0.
- def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond,
gprc_nor0:$T, gprc_nor0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
- def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ def SELECT_CC_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
- def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ def SELECT_CC_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
- def SELECT_CC_F16 : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_F16",
[]>;
- def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
// SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
// register bit directly.
- def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond,
gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
[(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
- def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
[(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
let Predicates = [HasFPU] in {
- def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
f4rc:$T, f4rc:$F), "#SELECT_F4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
- def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ def SELECT_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
f8rc:$T, f8rc:$F), "#SELECT_F8",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_F16 : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_F16",
[(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>;
}
- def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
vrrc:$T, vrrc:$F), "#SELECT_VRRC",
[(set v4i32:$dst,
(select i1:$cond, v4i32:$T, v4i32:$F))]>;
@@ -1268,18 +1273,18 @@ let Predicates = [HasFPU] in {
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
let mayStore = 1 in {
-def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
-def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F),
"#SPILL_CRBIT", []>;
}
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in {
-def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
-def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
"#RESTORE_CRBIT", []>;
}
@@ -1305,10 +1310,10 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR] in
- def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
let Defs = [LR] in
- def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -1506,19 +1511,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi :Pseudo< (outs),
+def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
@@ -1544,14 +1549,19 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
}
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
let Defs = [CTR] in
- def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
let isTerminator = 1 in
- def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
"#EH_SJLJ_LONGJMP32",
[(PPCeh_sjlj_longjmp addr:$buf)]>,
Requires<[In32BitMode]>;
@@ -1561,7 +1571,7 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
// a terminator. Size is set to 0 to prevent the builtin assembler
// from emitting it.
let isBranch = 1, isTerminator = 1, Size = 0 in {
- def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst),
"#EH_SjLj_Setup\t$dst", []>;
}
@@ -1648,119 +1658,117 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
// clean this up in PPCMIPeephole with calls to
// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
// in the first place.
-let usesCustomInserter = 1 in {
- let Defs = [CR0] in {
- def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
- [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
- [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
- [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
- [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
- [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
- [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
- [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
- [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
- [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_MAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
- [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMIN_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
- [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
- def ATOMIC_LOAD_UMAX_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
- [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
-
- def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
- def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
-
- def ATOMIC_SWAP_I8 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
- [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I16 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
- [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
- def ATOMIC_SWAP_I32 : Pseudo<
- (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
- [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
- }
+let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
+ [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
+ [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
+ [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
+ [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
+ [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
+ [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
+ [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
+ [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
+ [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
+ [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
+ [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
+ [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
@@ -1988,15 +1996,15 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
- "stb $rS, $src", IIC_LdStStore,
- [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
- "sth $rS, $src", IIC_LdStStore,
- [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
- "stw $rS, $src", IIC_LdStStore,
- [(store i32:$rS, iaddr:$src)]>;
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
+ "stb $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$dst)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
+ "sth $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$dst)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
+ "stw $rS, $dst", IIC_LdStStore,
+ [(store i32:$rS, iaddr:$dst)]>;
let Predicates = [HasFPU] in {
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", IIC_LdStSTFD,
@@ -2010,13 +2018,13 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
let Predicates = [HasFPU] in {
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
@@ -2084,19 +2092,19 @@ def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
(ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStSTUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -2543,8 +2551,8 @@ def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
// on a 32-bit target.
-let hasSideEffects = 1, usesCustomInserter = 1 in
-def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+let hasSideEffects = 1 in
+def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins),
"#ReadTB", []>;
let Uses = [CTR] in {
@@ -2603,13 +2611,13 @@ def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
// so we'll need to scavenge a register for it.
let mayStore = 1 in
-def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
"#SPILL_VRSAVE", []>;
// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
-def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let hasSideEffects = 0 in {
@@ -2648,9 +2656,9 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
} // hasSideEffects = 0
let Predicates = [HasFPU] in {
-// Pseudo instruction to perform FADD in round-to-zero mode.
-let usesCustomInserter = 1, Uses = [RM] in {
- def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+// Custom inserter instruction to perform FADD in round-to-zero mode.
+let Uses = [RM] in {
+ def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
[(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
}
@@ -3022,23 +3030,23 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Support for thread-local storage.
-def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
// This uses two output registers, the first as the real output, the second as a
// temporary register, used internally in code generation.
-def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
[]>, NoEncode<"$rT">;
-def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
"#LDgotTprelL32",
[(set i32:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
-def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3046,7 +3054,7 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsADDR32",
[(set i32:$rD,
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
@@ -3054,14 +3062,14 @@ def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsgdLADDR32",
[(set i32:$rD,
(PPCaddiTlsgdLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3069,7 +3077,7 @@ def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsldADDR32",
[(set i32:$rD,
(PPCgetTlsldAddr i32:$reg,
@@ -3078,31 +3086,31 @@ def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsldLADDR32",
[(set i32:$rD,
(PPCaddiTlsldLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
-def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,
(PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
-def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDISdtprelHA32",
[(set i32:$rD,
(PPCaddisDtprelHA i32:$reg,
tglobaltlsaddr:$disp))]>;
// Support for Position-independent code
-def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
[(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
-def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
// Standard shifts. These are represented separately from the real shifts above
@@ -3930,21 +3938,19 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
(SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-let usesCustomInserter = 1 in {
-def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_EQ_BIT",
[(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
"#ANDIo_1_GT_BIT",
[(set i1:$dst, (trunc i32:$in))]>;
-def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_EQ_BIT8",
[(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
"#ANDIo_1_GT_BIT8",
[(set i1:$dst, (trunc i64:$in))]>;
-}
def : Pat<(i1 (not (trunc i32:$in))),
(ANDIo_1_EQ_BIT $in)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index c4bb02695b36..ef589ad01fd7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -245,32 +245,30 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
- let usesCustomInserter = 1 in {
- def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QFRC",
- []>;
- def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QSRC",
- []>;
- def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
- i32imm:$BROPC), "#SELECT_CC_QBRC",
- []>;
-
- // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
- // register bit directly.
- def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
- qfrc:$T, qfrc:$F), "#SELECT_QFRC",
- [(set v4f64:$dst,
- (select i1:$cond, v4f64:$T, v4f64:$F))]>;
- def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
- qsrc:$T, qsrc:$F), "#SELECT_QSRC",
- [(set v4f32:$dst,
- (select i1:$cond, v4f32:$T, v4f32:$F))]>;
- def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
- qbrc:$T, qbrc:$F), "#SELECT_QBRC",
- [(set v4i1:$dst,
- (select i1:$cond, v4i1:$T, v4i1:$F))]>;
- }
+ def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
// Convert and Round Instructions
def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 96649efdc1bc..9f5891a45f22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -831,22 +831,20 @@ def : Pat<(f64 (fpextend f32:$src)),
}
let Predicates = [HasSPE] in {
- let usesCustomInserter = 1 in {
-def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst),
+def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,
i32imm:$BROPC), "#SELECT_CC_SPE4",
[]>;
-def SELECT_CC_SPE : Pseudo<(outs sperc:$dst),
+def SELECT_CC_SPE : PPCCustomInserterPseudo<(outs sperc:$dst),
(ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC),
"#SELECT_CC_SPE",
[]>;
-def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
spe4rc:$T, spe4rc:$F), "#SELECT_SPE4",
[(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
-def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crbitrc:$cond,
sperc:$T, sperc:$F), "#SELECT_SPE",
[(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
- }
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
(SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 781a3277441a..0f073388dc74 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
def SDTVecConv : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
]>;
+def SDTVabsd : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
+]>;
+
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
+def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -132,7 +137,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
"#XFLOADf64",
[(set f64:$XT, (load xoaddr:$src))]>;
@@ -163,7 +168,7 @@ let Uses = [RM] in {
[]>;
// Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
- let isPseudo = 1, CodeSize = 3 in
+ let CodeSize = 3 in
def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
"#XFSTOREf64",
[(store f64:$XT, xoaddr:$dst)]>;
@@ -898,37 +903,36 @@ let Uses = [RM] in {
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
-let usesCustomInserter = 1, // Expanded after instruction selection.
- PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
- def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
"#SELECT_CC_VSRC",
[]>;
- def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
(ins crbitrc:$cond, vsrc:$T, vsrc:$F),
"#SELECT_VSRC",
[(set v2f64:$dst,
(select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSFRC",
[]>;
- def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
(ins crbitrc:$cond, f8rc:$T, f8rc:$F),
"#SELECT_VSFRC",
[(set f64:$dst,
(select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_VSSRC",
[]>;
- def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
(ins crbitrc:$cond, f4rc:$T, f4rc:$F),
"#SELECT_VSSRC",
[(set f32:$dst,
(select i1:$cond, f32:$T, f32:$F))]>;
-} // usesCustomInserter
+}
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -1040,17 +1044,14 @@ def : Pat<(v2f64 (bitconvert v1i128:$A)),
def : Pat<(v1i128 (bitconvert v2f64:$A)),
(COPY_TO_REGCLASS $A, VRRC)>;
-// sign extension patterns
-// To extend "in place" from v2i32 to v2i64, we have input data like:
-// | undef | i32 | undef | i32 |
-// but xvcvsxwdp expects the input in big-Endian format:
-// | i32 | undef | i32 | undef |
-// so we need to shift everything to the left by one i32 (word) before
-// the conversion.
-def : Pat<(sext_inreg v2i64:$C, v2i32),
- (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
-def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
- (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
(v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
@@ -1069,10 +1070,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// Stores.
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
@@ -1159,6 +1156,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1200,6 +1217,27 @@ def ScalarLoads {
dag Li32 = (i32 (load xoaddr:$src));
}
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@@ -1241,23 +1279,19 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
"lxsiwzx $XT, $src", IIC_LdStLFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
- let CodeSize = 3 in
- def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
- "#XFLOADf32",
- [(set f32:$XT, (load xoaddr:$src))]>;
- // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
- def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWAX",
- [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
- // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
- def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWZX",
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
- }
+ // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+ let CodeSize = 3 in
+ def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
+ "#XFLOADf32",
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+ def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWAX",
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+ def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWZX",
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
@@ -1268,19 +1302,15 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
"stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
- // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
- // would cause these Pseudos are not expanded in expandPostRAPseudos()
- let isPseudo = 1 in {
- // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
- let CodeSize = 3 in
- def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
- "#XFSTOREf32",
- [(store f32:$XT, xoaddr:$dst)]>;
- // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
- def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
- "#STIWX",
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
- }
+ // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+ let CodeSize = 3 in
+ def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
+ "#XFSTOREf32",
+ [(store f32:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+ def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#STIWX",
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
} // UseVSXReg = 1
@@ -1443,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
(f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
}
let Predicates = [IsBigEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
}
// Instructions for converting float to i64 feeding a store.
@@ -1993,6 +2015,10 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -2671,6 +2697,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
"xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
// Extract Exponent/Significand DP/QP
def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
@@ -2678,6 +2707,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
// Vector Insert Word
let UseVSXReg = 1 in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
@@ -3238,20 +3271,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f64 (PPCVexts f64:$A, 2)),
(f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
- let isPseudo = 1 in {
- def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load ixaddr:$src))]>;
- def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load ixaddr:$src))]>;
- def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, ixaddr:$dst)]>;
- def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, ixaddr:$dst)]>;
- }
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load ixaddr:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load ixaddr:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, ixaddr:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, ixaddr:$dst)]>;
+
def : Pat<(f64 (extloadf32 ixaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
@@ -3533,22 +3565,20 @@ let AddedComplexity = 400 in {
}
let Predicates = [HasP9Vector] in {
- let isPseudo = 1 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
- }
}
}
// Integer extend helper dags 32 -> 64
@@ -3797,6 +3827,15 @@ let AddedComplexity = 400 in {
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
}
+ let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3825,6 +3864,15 @@ let AddedComplexity = 400 in {
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}
+ let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3869,10 +3917,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $A), VSRC),
(COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3884,10 +3933,11 @@ let AddedComplexity = 400 in {
(COPY_TO_REGCLASS (MTVSRD $B), VSRC),
(COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
- (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
- (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
@@ -3940,10 +3990,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
}
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -3953,10 +4002,9 @@ let AddedComplexity = 400 in {
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
(v2i64 (MTVSRDD $rB, $rA))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (VMRGOW
- (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
- (v4i32
- (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
}
// P9 Altivec instructions that can be used to build vectors.
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
@@ -4005,3 +4053,21 @@ let AddedComplexity = 400 in {
}
}
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+let Predicates = [HasP9Altivec] in {
+
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+ def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+ def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+ // As PPCVABSD description, the last operand indicates whether do the
+ // sign bit flip.
+ def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
index 230a04628504..d2a09f30c0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines intrinsics that are used by all hw codegen targets.
+// This describes the available hardware counters for PPC.
//
//===----------------------------------------------------------------------===//
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+ let CycleCounter = CpuCyclesPfmCounter;
}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 1892d1e3dc26..4458b92ceb5e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit,
"Number of r+r instructions converted to r+i in pre-emit peephole");
STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
+STATISTIC(NumberOfSelfCopies,
+ "Number of self copy instructions eliminated");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -60,9 +63,32 @@ namespace {
return false;
bool Changed = false;
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ // Detect self copies - these can result from running AADB.
+ if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.getNumOperands() == 3 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ else if (MCID.getNumOperands() == 2 &&
+ MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ NumberOfSelfCopies++;
+ LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ continue;
+ }
+ }
MachineInstr *DefMIToErase = nullptr;
if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
Changed = true;
@@ -74,6 +100,75 @@ namespace {
}
}
}
+
+ // Eliminate conditional branch based on a constant CR bit by
+ // CRSET or CRUNSET. We eliminate the conditional branch or
+ // convert it into an unconditional branch. Also, if the CR bit
+ // is not used by other instructions, we eliminate CRSET as well.
+ auto I = MBB.getFirstInstrTerminator();
+ if (I == MBB.instr_end())
+ continue;
+ MachineInstr *Br = &*I;
+ if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+ continue;
+ MachineInstr *CRSetMI = nullptr;
+ unsigned CRBit = Br->getOperand(0).getReg();
+ unsigned CRReg = getCRFromCRBit(CRBit);
+ bool SeenUse = false;
+ MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+ for (It++; It != Er; It++) {
+ if (It->modifiesRegister(CRBit, TRI)) {
+ if ((It->getOpcode() == PPC::CRUNSET ||
+ It->getOpcode() == PPC::CRSET) &&
+ It->getOperand(0).getReg() == CRBit)
+ CRSetMI = &*It;
+ break;
+ }
+ if (It->readsRegister(CRBit, TRI))
+ SeenUse = true;
+ }
+ if (!CRSetMI) continue;
+
+ unsigned CRSetOp = CRSetMI->getOpcode();
+ if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+ (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
+ // Remove this branch since it cannot be taken.
+ InstrsToErase.push_back(Br);
+ MBB.removeSuccessor(Br->getOperand(1).getMBB());
+ }
+ else {
+ // This conditional branch is always taken. So, remove all branches
+ // and insert an unconditional branch to the destination of this.
+ MachineBasicBlock::iterator It = Br, Er = MBB.end();
+ for (; It != Er; It++) {
+ if (It->isDebugInstr()) continue;
+ assert(It->isTerminator() && "Non-terminator after a terminator");
+ InstrsToErase.push_back(&*It);
+ }
+ if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+ ArrayRef<MachineOperand> NoCond;
+ TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
+ NoCond, Br->getDebugLoc());
+ }
+ for (auto &Succ : MBB.successors())
+ if (Succ != Br->getOperand(1).getMBB()) {
+ MBB.removeSuccessor(Succ);
+ break;
+ }
+ }
+
+ // If the CRBit is not used by another instruction, we can eliminate
+ // CRSET/CRUNSET instruction.
+ if (!SeenUse) {
+ // We need to check use of the CRBit in successors.
+ for (auto &SuccMBB : MBB.successors())
+ if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+ SeenUse = true;
+ break;
+ }
+ if (!SeenUse)
+ InstrsToErase.push_back(CRSetMI);
+ }
}
for (MachineInstr *MI : InstrsToErase) {
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 96923a97a82c..3d067aa8e621 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -673,12 +673,15 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
- BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
- getCRFromCRBit(SrcReg))
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+ // We need to move the CR field that contains the CR bit we are spilling.
+ // The super register may not be explicitly defined (i.e. it can be defined
+ // by a CR-logical that only defines the subreg) so we state that the CR
+ // field is undef. Also, in order to preserve the kill flag on the CR bit,
+ // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(getCRFromCRBit(SrcReg));
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg,
+ RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0LT, shift the bits left so that the bit to
// store is the first one. Mask all but that bit.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 91a98ee4efc7..e93fe4ce3453 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -85,8 +85,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
@@ -141,6 +139,23 @@ public:
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'q': // for QPX
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e641cf9e00a..d0d29b6d2c7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -85,6 +85,12 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> {
let SubRegIndices = [sub_64];
}
+// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
+// and encoding to match.
+class VSXReg<bits<6> num, string n> : PPCReg<n> {
+ let HWEncoding{5-0} = num;
+}
+
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
@@ -148,7 +154,7 @@ foreach Index = 0-31 in {
// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
// asm printing.
foreach Index = 32-63 in {
- def VSX#Index : PPCReg<"vs"#Index>;
+ def VSX#Index : VSXReg<Index, "vs"#Index>;
}
// The reprsentation of r0 when treated as the constant 0.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 5ad0a517c117..c8fe7d7eea78 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -42,7 +42,6 @@ def IIC_LdStLoad : InstrItinClass;
def IIC_LdStLoadUpd : InstrItinClass;
def IIC_LdStLoadUpdX : InstrItinClass;
def IIC_LdStStore : InstrItinClass;
-def IIC_LdStStoreUpd : InstrItinClass;
def IIC_LdStDSS : InstrItinClass;
def IIC_LdStICBI : InstrItinClass;
def IIC_LdStLD : InstrItinClass;
@@ -63,8 +62,8 @@ def IIC_LdStSLBIA : InstrItinClass;
def IIC_LdStSLBIE : InstrItinClass;
def IIC_LdStSTD : InstrItinClass;
def IIC_LdStSTDCX : InstrItinClass;
-def IIC_LdStSTDU : InstrItinClass;
-def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTU : InstrItinClass;
+def IIC_LdStSTUX : InstrItinClass;
def IIC_LdStSTFD : InstrItinClass;
def IIC_LdStSTFDU : InstrItinClass;
def IIC_LdStSTVEBX : InstrItinClass;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 2455e5e52de5..646822eedbe0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -280,13 +280,6 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
- InstrStage<1, [P440_LRACC]>,
- InstrStage<1, [P440_AGEN]>,
- InstrStage<1, [P440_CRD]>,
- InstrStage<2, [P440_LWB]>],
- [2, 1, 1, 1],
- [NoBypass, P440_GPR_Bypass]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
@@ -373,14 +366,14 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [P440_LWB]>],
[4, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
InstrStage<2, [P440_LWB]>],
[2, 1, 1, 1],
[NoBypass, P440_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
InstrStage<1, [P440_LRACC]>,
InstrStage<1, [P440_AGEN]>,
InstrStage<1, [P440_CRD]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 54cfae5d74b7..f34c1accc0fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -81,8 +81,6 @@ def PPCA2Itineraries : ProcessorItineraries<
[6, 0, 0]>,
InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
- [2, 0, 0, 0]>,
InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
[16, 0, 0]>,
InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
@@ -105,9 +103,9 @@ def PPCA2Itineraries : ProcessorItineraries<
[82, 0, 0]>, // L2 latency
InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
[0, 0, 0]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [A2_XU]>],
[2, 0, 0, 0]>,
InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
[82, 0, 0]>, // L2 latency
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index d7c2bd15a258..479a970b2537 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
@@ -144,7 +144,13 @@ def PPCE500Itineraries : ProcessorItineraries<
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SU0, E500_SU1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<1, [E500_SU0, E500_SU1], 0>,
InstrStage<1, [E500_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index 5f95f2a79f66..d8bda073833f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -157,7 +157,13 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, E500mc_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+ InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
+ InstrStage<1, [E500mc_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500mc_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
InstrStage<1, [E500mc_LSU_0]>],
[6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 32f8e652dd56..3e50803955c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -206,12 +206,6 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
- InstrStage<1, [E5500_LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, E5500_GPR_Bypass],
- 2>, // 2 micro-ops
InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
@@ -281,13 +275,13 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass]>,
- InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
[NoBypass, E5500_GPR_Bypass],
2>, // 2 micro-ops
- InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrItinData<IIC_LdStSTUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
InstrStage<1, [E5500_LSU_0]>],
[7, 2], // Latency = 3, Repeat rate = 1
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 21efd8f8f6c9..0995b7200d93 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -43,7 +43,8 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 340773ef7876..1b15c7b3c7ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -48,7 +48,8 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 1d9f13fcb850..0044c3c6a449 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -56,7 +56,6 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
@@ -73,8 +72,8 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index b5a9f96d45ae..c802b80170fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -54,7 +54,6 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
@@ -76,8 +75,8 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<3, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index a8678f56900e..1d6e509819da 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -114,6 +114,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -126,6 +130,10 @@ def P7Itineraries : ProcessorItineraries<
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
P7_DU3, P7_DU4], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -253,13 +261,13 @@ def P7Itineraries : ProcessorItineraries<
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_LS1, P7_LS2], 0>,
InstrStage<1, [P7_FX1, P7_FX2]>,
InstrStage<1, [P7_FX1, P7_FX2]>],
[2, 1, 1, 1]>,
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>,
InstrStage<1, [P7_DU2], 0>,
InstrStage<1, [P7_DU3], 0>,
InstrStage<1, [P7_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 79963dd6a3e9..ff39dfda7016 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -90,6 +90,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[4, 1, 1]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -102,6 +106,10 @@ def P8Itineraries : ProcessorItineraries<
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[1, 1, 1]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
P8_DU4, P8_DU5, P8_DU6], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -259,14 +267,14 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2]>]
[1, 1, 1]>,
- InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_LU1, P8_LU2,
P8_LSU1, P8_LSU2], 0>,
InstrStage<1, [P8_FXU1, P8_FXU2]>],
[2, 1, 1, 1]>,
// First+last
- InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
InstrStage<1, [P8_DU2], 0>,
InstrStage<1, [P8_DU3], 0>,
InstrStage<1, [P8_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index e1a480117315..a1e625c855e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -33,6 +33,12 @@ def P9Model : SchedMachineModel {
// A dispatch group is 6 instructions.
let LoopMicroOpBufferSize = 60;
+ // As iops are dispatched to a slice, they are held in an independent slice
+ // issue queue until all register sources and other dependencies have been
+ // resolved and they can be issued. Each of four execution slices has an
+ // 11-entry iop issue queue.
+ let MicroOpBufferSize = 44;
+
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index a8d7955ef548..580d057602f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -181,6 +181,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
const TargetOptions &Options) {
+ if (TT.isOSDarwin())
+ report_fatal_error("Darwin is no longer supported for PowerPC");
+
if (Options.MCOptions.getABIName().startswith("elfv1"))
return PPCTargetMachine::PPC_ABI_ELFv1;
else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -211,19 +214,24 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (TT.isOSDarwin())
return Reloc::DynamicNoPIC;
- // Non-darwin 64-bit platforms are PIC by default.
- if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+ // Big Endian PPC is PIC by default.
+ if (TT.getArch() == Triple::ppc64)
return Reloc::PIC_;
- // 32-bit is static by default.
+ // Rest are static by default.
return Reloc::Static;
}
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
- Optional<CodeModel::Model> CM,
- bool JIT) {
- if (CM)
+static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
+ Optional<CodeModel::Model> CM,
+ bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (!TT.isOSDarwin() && !JIT &&
(TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
return CodeModel::Medium;
@@ -243,7 +251,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(TT, CM, JIT), OL),
+ getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b0da9b5a6d70..bc9bcab83a0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -473,7 +473,14 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
+
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2ee2b3eb8084..9221a910288a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -90,7 +90,9 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
/// @}
};
diff --git a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9a455c105482..1d1112cc5124 100644
--- a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -39,6 +43,8 @@ namespace {
struct RISCVOperand;
class RISCVAsmParser : public MCTargetAsmParser {
+ SmallVector<FeatureBitset, 4> FeatureBitStack;
+
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
@@ -73,6 +79,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
// synthesize the desired immedate value into the destination register.
void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
+ // Helper to emit pseudo instruction "lla" used in PC-rel addressing.
+ void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
@@ -83,13 +92,16 @@ class RISCVAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "RISCVGenAsmMatcher.inc"
+ OperandMatchResultTy parseCSRSystemRegister(OperandVector &Operands);
OperandMatchResultTy parseImmediate(OperandVector &Operands);
OperandMatchResultTy parseRegister(OperandVector &Operands,
bool AllowParens = false);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
+ OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
+ OperandMatchResultTy parseJALOffset(OperandVector &Operands);
- bool parseOperand(OperandVector &Operands, bool ForceImmediate);
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
bool parseDirectiveOption();
@@ -108,6 +120,21 @@ class RISCVAsmParser : public MCTargetAsmParser {
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
+
+ void pushFeatureBits() {
+ FeatureBitStack.push_back(getSTI().getFeatureBits());
+ }
+
+ bool popFeatureBits() {
+ if (FeatureBitStack.empty())
+ return true;
+
+ FeatureBitset FeatureBits = FeatureBitStack.pop_back_val();
+ copySTI().setFeatureBits(FeatureBits);
+ setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
+
+ return false;
+ }
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -139,6 +166,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
Token,
Register,
Immediate,
+ SystemRegister
} Kind;
bool IsRV64;
@@ -151,11 +179,20 @@ struct RISCVOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Encoding;
+ // FIXME: Add the Encoding parsed fields as needed for checks,
+ // e.g.: read/write or user/supervisor/machine privileges.
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
+ struct SysRegOp SysReg;
};
RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -176,6 +213,9 @@ public:
case Token:
Tok = o.Tok;
break;
+ case SystemRegister:
+ SysReg = o.SysReg;
+ break;
}
}
@@ -183,19 +223,22 @@ public:
bool isReg() const override { return Kind == Register; }
bool isImm() const override { return Kind == Immediate; }
bool isMem() const override { return false; }
+ bool isSystemRegister() const { return Kind == SystemRegister; }
- bool evaluateConstantImm(int64_t &Imm, RISCVMCExpr::VariantKind &VK) const {
- const MCExpr *Val = getImm();
- bool Ret = false;
- if (auto *RE = dyn_cast<RISCVMCExpr>(Val)) {
- Ret = RE->evaluateAsConstant(Imm);
+ static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
+ RISCVMCExpr::VariantKind &VK) {
+ if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
VK = RE->getKind();
- } else if (auto CE = dyn_cast<MCConstantExpr>(Val)) {
- Ret = true;
+ return RE->evaluateAsConstant(Imm);
+ }
+
+ if (auto CE = dyn_cast<MCConstantExpr>(Expr)) {
VK = RISCVMCExpr::VK_RISCV_None;
Imm = CE->getValue();
+ return true;
}
- return Ret;
+
+ return false;
}
// True if operand is a symbol with no modifiers, or a constant with no
@@ -205,7 +248,7 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
bool IsValid;
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
@@ -220,12 +263,14 @@ public:
int64_t Imm;
RISCVMCExpr::VariantKind VK;
// Must be of 'immediate' type but not a constant.
- if (!isImm() || evaluateConstantImm(Imm, VK))
+ if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isCSRSystemRegister() const { return isSystemRegister(); }
+
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
bool isFenceArg() const {
@@ -265,12 +310,14 @@ public:
return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
}
- bool isImmXLen() const {
+ bool isImmXLenLI() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (VK == RISCVMCExpr::VK_RISCV_LO || VK == RISCVMCExpr::VK_RISCV_PCREL_LO)
+ return true;
// Given only Imm, ensuring that the actually specified constant is either
// a signed or unsigned 64-bit number is unfortunately impossible.
bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm);
@@ -282,7 +329,8 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+ if (!evaluateConstantImm(getImm(), Imm, VK) ||
+ VK != RISCVMCExpr::VK_RISCV_None)
return false;
return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
@@ -292,7 +340,8 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+ if (!evaluateConstantImm(getImm(), Imm, VK) ||
+ VK != RISCVMCExpr::VK_RISCV_None)
return false;
if (Imm == 0)
return false;
@@ -304,7 +353,7 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -313,66 +362,68 @@ public:
RISCVMCExpr::VariantKind VK;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6() const {
+ if (!isImm())
+ return false;
RISCVMCExpr::VariantKind VK;
int64_t Imm;
- bool IsValid;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = isInt<6>(Imm);
- return IsValid &&
- (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<6>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
+ if (!isImm())
+ return false;
RISCVMCExpr::VariantKind VK;
int64_t Imm;
- bool IsValid;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = ((Imm != 0) && isInt<6>(Imm));
- return IsValid &&
- (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isCLUIImm() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) &&
(isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
- VK == RISCVMCExpr::VK_RISCV_None;
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm7Lsb00() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb00() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb000() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -380,17 +431,21 @@ public:
bool isSImm9Lsb0() const { return isBareSimmNLsb0<9>(); }
bool isUImm9Lsb000() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm10Lsb00NonZero() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -401,54 +456,63 @@ public:
bool IsValid;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
else
IsValid = isInt<12>(Imm);
- return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
+ return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
VK == RISCVMCExpr::VK_RISCV_LO ||
VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
}
bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
- bool isUImm12() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
- }
-
bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
bool isSImm10Lsb0000NonZero() const {
+ if (!isImm())
+ return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
- bool isUImm20() const {
+ bool isUImm20LUI() const {
RISCVMCExpr::VariantKind VK;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(Imm, VK);
- if (!IsConstantImm)
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- else
- IsValid = isUInt<20>(Imm);
- return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
- VK == RISCVMCExpr::VK_RISCV_HI ||
- VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+ return IsValid && VK == RISCVMCExpr::VK_RISCV_HI;
+ } else {
+ return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+ VK == RISCVMCExpr::VK_RISCV_HI);
+ }
+ }
+
+ bool isUImm20AUIPC() const {
+ RISCVMCExpr::VariantKind VK;
+ int64_t Imm;
+ bool IsValid;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ if (!IsConstantImm) {
+ IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+ return IsValid && VK == RISCVMCExpr::VK_RISCV_PCREL_HI;
+ } else {
+ return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+ VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+ }
}
- bool isSImm21Lsb0() const { return isBareSimmNLsb0<21>(); }
+ bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); }
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
@@ -462,6 +526,11 @@ public:
return Reg.RegNum;
}
+ StringRef getSysReg() const {
+ assert(Kind == SystemRegister && "Invalid access!");
+ return StringRef(SysReg.Data, SysReg.Length);
+ }
+
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid type access!");
return Imm.Val;
@@ -484,6 +553,9 @@ public:
case Token:
OS << "'" << getToken() << "'";
break;
+ case SystemRegister:
+ OS << "<sysreg: " << getSysReg() << '>';
+ break;
}
}
@@ -517,16 +589,22 @@ public:
return Op;
}
+ static std::unique_ptr<RISCVOperand>
+ createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
+ auto Op = make_unique<RISCVOperand>(SystemRegister);
+ Op->SysReg.Data = Str.data();
+ Op->SysReg.Length = Str.size();
+ Op->SysReg.Encoding = Encoding;
+ Op->StartLoc = S;
+ Op->IsRV64 = IsRV64;
+ return Op;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
- bool IsConstant = false;
- if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
- IsConstant = RE->evaluateAsConstant(Imm);
- } else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
- IsConstant = true;
- Imm = CE->getValue();
- }
+ RISCVMCExpr::VariantKind VK;
+ bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
if (IsConstant)
Inst.addOperand(MCOperand::createImm(Imm));
@@ -553,16 +631,22 @@ public:
unsigned Imm = 0;
for (char c : SE->getSymbol().getName()) {
switch (c) {
- default: llvm_unreachable("FenceArg must contain only [iorw]");
- case 'i': Imm |= RISCVFenceField::I; break;
- case 'o': Imm |= RISCVFenceField::O; break;
- case 'r': Imm |= RISCVFenceField::R; break;
- case 'w': Imm |= RISCVFenceField::W; break;
+ default:
+ llvm_unreachable("FenceArg must contain only [iorw]");
+ case 'i': Imm |= RISCVFenceField::I; break;
+ case 'o': Imm |= RISCVFenceField::O; break;
+ case 'r': Imm |= RISCVFenceField::R; break;
+ case 'w': Imm |= RISCVFenceField::W; break;
}
}
Inst.addOperand(MCOperand::createImm(Imm));
}
+ void addCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
+ }
+
// Returns the rounding mode represented by this RISCVOperand. Should only
// be called after checking isFRMArg.
RISCVFPRndMode::RoundingMode getRoundingMode() const {
@@ -590,40 +674,40 @@ public:
// information from TableGen.
unsigned convertFPR32ToFPR64(unsigned Reg) {
switch (Reg) {
- default:
- llvm_unreachable("Not a recognised FPR32 register");
- case RISCV::F0_32: return RISCV::F0_64;
- case RISCV::F1_32: return RISCV::F1_64;
- case RISCV::F2_32: return RISCV::F2_64;
- case RISCV::F3_32: return RISCV::F3_64;
- case RISCV::F4_32: return RISCV::F4_64;
- case RISCV::F5_32: return RISCV::F5_64;
- case RISCV::F6_32: return RISCV::F6_64;
- case RISCV::F7_32: return RISCV::F7_64;
- case RISCV::F8_32: return RISCV::F8_64;
- case RISCV::F9_32: return RISCV::F9_64;
- case RISCV::F10_32: return RISCV::F10_64;
- case RISCV::F11_32: return RISCV::F11_64;
- case RISCV::F12_32: return RISCV::F12_64;
- case RISCV::F13_32: return RISCV::F13_64;
- case RISCV::F14_32: return RISCV::F14_64;
- case RISCV::F15_32: return RISCV::F15_64;
- case RISCV::F16_32: return RISCV::F16_64;
- case RISCV::F17_32: return RISCV::F17_64;
- case RISCV::F18_32: return RISCV::F18_64;
- case RISCV::F19_32: return RISCV::F19_64;
- case RISCV::F20_32: return RISCV::F20_64;
- case RISCV::F21_32: return RISCV::F21_64;
- case RISCV::F22_32: return RISCV::F22_64;
- case RISCV::F23_32: return RISCV::F23_64;
- case RISCV::F24_32: return RISCV::F24_64;
- case RISCV::F25_32: return RISCV::F25_64;
- case RISCV::F26_32: return RISCV::F26_64;
- case RISCV::F27_32: return RISCV::F27_64;
- case RISCV::F28_32: return RISCV::F28_64;
- case RISCV::F29_32: return RISCV::F29_64;
- case RISCV::F30_32: return RISCV::F30_64;
- case RISCV::F31_32: return RISCV::F31_64;
+ default:
+ llvm_unreachable("Not a recognised FPR32 register");
+ case RISCV::F0_32: return RISCV::F0_64;
+ case RISCV::F1_32: return RISCV::F1_64;
+ case RISCV::F2_32: return RISCV::F2_64;
+ case RISCV::F3_32: return RISCV::F3_64;
+ case RISCV::F4_32: return RISCV::F4_64;
+ case RISCV::F5_32: return RISCV::F5_64;
+ case RISCV::F6_32: return RISCV::F6_64;
+ case RISCV::F7_32: return RISCV::F7_64;
+ case RISCV::F8_32: return RISCV::F8_64;
+ case RISCV::F9_32: return RISCV::F9_64;
+ case RISCV::F10_32: return RISCV::F10_64;
+ case RISCV::F11_32: return RISCV::F11_64;
+ case RISCV::F12_32: return RISCV::F12_64;
+ case RISCV::F13_32: return RISCV::F13_64;
+ case RISCV::F14_32: return RISCV::F14_64;
+ case RISCV::F15_32: return RISCV::F15_64;
+ case RISCV::F16_32: return RISCV::F16_64;
+ case RISCV::F17_32: return RISCV::F17_64;
+ case RISCV::F18_32: return RISCV::F18_64;
+ case RISCV::F19_32: return RISCV::F19_64;
+ case RISCV::F20_32: return RISCV::F20_64;
+ case RISCV::F21_32: return RISCV::F21_64;
+ case RISCV::F22_32: return RISCV::F22_64;
+ case RISCV::F23_32: return RISCV::F23_64;
+ case RISCV::F24_32: return RISCV::F24_64;
+ case RISCV::F25_32: return RISCV::F25_64;
+ case RISCV::F26_32: return RISCV::F26_64;
+ case RISCV::F27_32: return RISCV::F27_64;
+ case RISCV::F28_32: return RISCV::F28_64;
+ case RISCV::F29_32: return RISCV::F29_64;
+ case RISCV::F30_32: return RISCV::F30_64;
+ case RISCV::F31_32: return RISCV::F31_64;
}
}
@@ -663,7 +747,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool MatchingInlineAsm) {
MCInst Inst;
- switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ auto Result =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ switch (Result) {
default:
break;
case Match_Success:
@@ -684,7 +770,21 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_InvalidImmXLen:
+ }
+
+ // Handle the case when the error message is of specific type
+ // other than the generic Match_InvalidOperand, and the
+ // corresponding operand is missing.
+ if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+ }
+
+ switch(Result) {
+ default:
+ break;
+ case Match_InvalidImmXLenLI:
if (isRV64()) {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a constant 64-bit integer");
@@ -706,8 +806,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
(1 << 5) - 1);
case Match_InvalidSImm6NonZero:
- return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
- (1 << 5) - 1,
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, -(1 << 5), (1 << 5) - 1,
"immediate must be non-zero in the range");
case Match_InvalidCLUIImm:
return generateImmOutOfRangeError(
@@ -742,24 +842,36 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16,
"immediate must be a multiple of 16 bytes and non-zero in the range");
case Match_InvalidSImm12:
- return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11),
- (1 << 11) - 1);
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
+ "operand must be a symbol with %lo/%pcrel_lo modifier or an integer in "
+ "the range");
case Match_InvalidSImm12Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
"immediate must be a multiple of 2 bytes in the range");
- case Match_InvalidUImm12:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
case Match_InvalidSImm13Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
- case Match_InvalidUImm20:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
- case Match_InvalidSImm21Lsb0:
+ case Match_InvalidUImm20LUI:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
+ "operand must be a symbol with %hi() "
+ "modifier or an integer in the range");
+ case Match_InvalidUImm20AUIPC:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 20) - 1,
+ "operand must be a symbol with %pcrel_hi() modifier or an integer in "
+ "the range");
+ case Match_InvalidSImm21Lsb0JAL:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
"immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidCSRSystemRegister: {
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1,
+ "operand must be a valid system register "
+ "name or an integer in the range");
+ }
case Match_InvalidFenceArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
@@ -842,9 +954,9 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
return MatchOperand_Success;
}
-OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+OperandMatchResultTy
+RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
switch (getLexer().getKind()) {
@@ -854,18 +966,77 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
- case AsmToken::String:
+ case AsmToken::String: {
if (getParser().parseExpression(Res))
return MatchOperand_ParseFail;
- break;
+
+ auto *CE = dyn_cast<MCConstantExpr>(Res);
+ if (CE) {
+ int64_t Imm = CE->getValue();
+ if (isUInt<12>(Imm)) {
+ auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+ // Accept an immediate representing a named or un-named Sys Reg
+ // if the range is valid, regardless of the required features.
+ Operands.push_back(RISCVOperand::createSysReg(
+ SysReg ? SysReg->Name : "", S, Imm, isRV64()));
+ return MatchOperand_Success;
+ }
+ }
+
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- break;
+
+ auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
+ // Accept a named Sys Reg if the required features are present.
+ if (SysReg) {
+ if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
+ Error(S, "system register use requires an option to be enabled");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(RISCVOperand::createSysReg(
+ Identifier, S, SysReg->Encoding, isRV64()));
+ return MatchOperand_Success;
+ }
+
+ Twine Msg = "operand must be a valid system register name "
+ "or an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
+ case AsmToken::Percent: {
+ // Discard operand with modifier.
+ Twine Msg = "immediate must be an integer in the range";
+ Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+ return MatchOperand_ParseFail;
+ }
}
+
+ return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Integer:
+ case AsmToken::String:
+ case AsmToken::Identifier:
+ if (getParser().parseExpression(Res))
+ return MatchOperand_ParseFail;
+ break;
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
@@ -914,6 +1085,41 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier))
+ return MatchOperand_ParseFail;
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
+ // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
+ // both being acceptable forms. When parsing `jal ra, foo` this function
+ // will be called for the `ra` register operand in an attempt to match the
+ // single-operand alias. parseJALOffset must fail for this case. It would
+ // seem logical to try parse the operand using parseImmediate and return
+ // NoMatch if the next token is a comma (meaning we must be parsing a jal in
+ // the second form rather than the first). We can't do this as there's no
+ // way of rewinding the lexer state. Instead, return NoMatch if this operand
+ // is an identifier and is followed by a comma.
+ if (getLexer().is(AsmToken::Identifier) &&
+ getLexer().peekTok().is(AsmToken::Comma))
+ return MatchOperand_NoMatch;
+
+ return parseImmediate(Operands);
+}
+
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
@@ -942,13 +1148,19 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. If operand was parsed, returns false, else
-/// true. If ForceImmediate is true, no attempt will be made to parse the
-/// operand as a register, which is needed for pseudoinstructions such as
-/// call.
-bool RISCVAsmParser::parseOperand(OperandVector &Operands,
- bool ForceImmediate) {
- // Attempt to parse token as register, unless ForceImmediate.
- if (!ForceImmediate && parseRegister(Operands, true) == MatchOperand_Success)
+/// true.
+bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy Result =
+ MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
+ if (Result == MatchOperand_Success)
+ return false;
+ if (Result == MatchOperand_ParseFail)
+ return true;
+
+ // Attempt to parse token as a register.
+ if (parseRegister(Operands, true) == MatchOperand_Success)
return false;
// Attempt to parse token as an immediate
@@ -967,6 +1179,21 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands,
bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
+ // Ensure that if the instruction occurs when relaxation is enabled,
+ // relocations are forced for the file. Ideally this would be done when there
+ // is enough information to reliably determine if the instruction itself may
+ // cause relaxations. Unfortunately instruction processing stage occurs in the
+ // same pass as relocation emission, so it's too late to set a 'sticky bit'
+ // for the entire file.
+ if (getSTI().getFeatureBits()[RISCV::FeatureRelax]) {
+ auto *Assembler = getTargetStreamer().getStreamer().getAssemblerPtr();
+ if (Assembler != nullptr) {
+ RISCVAsmBackend &MAB =
+ static_cast<RISCVAsmBackend &>(Assembler->getBackend());
+ MAB.setForceRelocs();
+ }
+ }
+
// First operand is token for instruction
Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
@@ -975,18 +1202,20 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
// Parse first operand
- bool ForceImmediate = (Name == "call" || Name == "tail");
- if (parseOperand(Operands, ForceImmediate))
+ if (parseOperand(Operands, Name))
return true;
// Parse until end of statement, consuming commas between operands
+ unsigned OperandIdx = 1;
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token
getLexer().Lex();
// Parse next operand
- if (parseOperand(Operands, false))
+ if (parseOperand(Operands, Name))
return true;
+
+ ++OperandIdx;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -1068,6 +1297,33 @@ bool RISCVAsmParser::parseDirectiveOption() {
StringRef Option = Tok.getIdentifier();
+ if (Option == "push") {
+ getTargetStreamer().emitDirectiveOptionPush();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ pushFeatureBits();
+ return false;
+ }
+
+ if (Option == "pop") {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ getTargetStreamer().emitDirectiveOptionPop();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ if (popFeatureBits())
+ return Error(StartLoc, ".option pop with no .option push");
+
+ return false;
+ }
+
if (Option == "rvc") {
getTargetStreamer().emitDirectiveOptionRVC();
@@ -1092,9 +1348,34 @@ bool RISCVAsmParser::parseDirectiveOption() {
return false;
}
+ if (Option == "relax") {
+ getTargetStreamer().emitDirectiveOptionRelax();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ setFeatureBits(RISCV::FeatureRelax, "relax");
+ return false;
+ }
+
+ if (Option == "norelax") {
+ getTargetStreamer().emitDirectiveOptionNoRelax();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ clearFeatureBits(RISCV::FeatureRelax, "relax");
+ return false;
+ }
+
// Unknown option.
Warning(Parser.getTok().getLoc(),
- "unknown option, expected 'rvc' or 'norvc'");
+ "unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or "
+ "'norelax'");
Parser.eatToEndOfStatement();
return false;
}
@@ -1108,80 +1389,54 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
MCStreamer &Out) {
- if (isInt<32>(Value)) {
- // Emits the MC instructions for loading a 32-bit constant into a register.
- //
- // Depending on the active bits in the immediate Value v, the following
- // instruction sequences are emitted:
- //
- // v == 0 : ADDI(W)
- // v[0,12) != 0 && v[12,32) == 0 : ADDI(W)
- // v[0,12) == 0 && v[12,32) != 0 : LUI
- // v[0,32) != 0 : LUI+ADDI(W)
- //
- int64_t Hi20 = ((Value + 0x800) >> 12) & 0xFFFFF;
- int64_t Lo12 = SignExtend64<12>(Value);
- unsigned SrcReg = RISCV::X0;
-
- if (Hi20) {
- emitToStreamer(Out,
- MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Hi20));
- SrcReg = DestReg;
+ RISCVMatInt::InstSeq Seq;
+ RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
+
+ unsigned SrcReg = RISCV::X0;
+ for (RISCVMatInt::Inst &Inst : Seq) {
+ if (Inst.Opc == RISCV::LUI) {
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
+ } else {
+ emitToStreamer(
+ Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
+ Inst.Imm));
}
- if (Lo12 || Hi20 == 0) {
- unsigned AddiOpcode =
- STI->hasFeature(RISCV::Feature64Bit) ? RISCV::ADDIW : RISCV::ADDI;
- emitToStreamer(Out, MCInstBuilder(AddiOpcode)
- .addReg(DestReg)
- .addReg(SrcReg)
- .addImm(Lo12));
- }
- return;
- }
- assert(STI->hasFeature(RISCV::Feature64Bit) &&
- "Target must be 64-bit to support a >32-bit constant");
-
- // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
- // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
- // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
- // while the following ADDI instructions contribute up to 12 bits each.
- //
- // On the first glance, implementing this seems to be possible by simply
- // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
- // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
- // fact that ADDI performs a sign extended addition, doing it like that would
- // only be possible when at most 11 bits of the ADDI instructions are used.
- // Using all 12 bits of the ADDI instructions, like done by GAS, actually
- // requires that the constant is processed starting with the least significant
- // bit.
- //
- // In the following, constants are processed from LSB to MSB but instruction
- // emission is performed from MSB to LSB by recursively calling
- // emitLoadImm. In each recursion, first the lowest 12 bits are removed
- // from the constant and the optimal shift amount, which can be greater than
- // 12 bits if the constant is sparse, is determined. Then, the shifted
- // remaining constant is processed recursively and gets emitted as soon as it
- // fits into 32 bits. The emission of the shifts and additions is subsequently
- // performed when the recursion returns.
- //
- int64_t Lo12 = SignExtend64<12>(Value);
- int64_t Hi52 = (Value + 0x800) >> 12;
- int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
- Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
-
- emitLoadImm(DestReg, Hi52, Out);
-
- emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
- .addReg(DestReg)
- .addReg(DestReg)
- .addImm(ShiftAmount));
-
- if (Lo12)
- emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
- .addReg(DestReg)
- .addReg(DestReg)
- .addImm(Lo12));
+ // Only the first instruction has X0 as its source.
+ SrcReg = DestReg;
+ }
+}
+
+void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The local load address pseudo-instruction "lla" is used in PC-relative
+ // addressing of symbols:
+ // lla rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+ // ADDI rdest, %pcrel_lo(TmpLabel)
+ MCContext &Ctx = getContext();
+
+ MCSymbol *TmpLabel = Ctx.createTempSymbol(
+ "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
+ Out.EmitLabel(TmpLabel);
+
+ MCOperand DestReg = Inst.getOperand(0);
+ const RISCVMCExpr *Symbol = RISCVMCExpr::create(
+ Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
+
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol));
+
+ const MCExpr *RefToLinkTmpLabel =
+ RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
+ RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
+
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+ .addOperand(DestReg)
+ .addOperand(DestReg)
+ .addExpr(RefToLinkTmpLabel));
}
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
@@ -1189,7 +1444,17 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Inst.setLoc(IDLoc);
if (Inst.getOpcode() == RISCV::PseudoLI) {
- auto Reg = Inst.getOperand(0).getReg();
+ unsigned Reg = Inst.getOperand(0).getReg();
+ const MCOperand &Op1 = Inst.getOperand(1);
+ if (Op1.isExpr()) {
+ // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
+ // Just convert to an addi. This allows compatibility with gas.
+ emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+ .addReg(Reg)
+ .addReg(RISCV::X0)
+ .addExpr(Op1.getExpr()));
+ return false;
+ }
int64_t Imm = Inst.getOperand(1).getImm();
// On RV32 the immediate here can either be a signed or an unsigned
// 32-bit number. Sign extension has to be performed to ensure that Imm
@@ -1198,6 +1463,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm);
emitLoadImm(Reg, Imm, Out);
return false;
+ } else if (Inst.getOpcode() == RISCV::PseudoLLA) {
+ emitLoadLocalAddress(Inst, IDLoc, Out);
+ return false;
}
emitToStreamer(Out, Inst);
diff --git a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7bbb371a757f..eafa09d56315 100644
--- a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -211,6 +212,15 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
+static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ if (Imm == 0)
+ return MCDisassembler::Fail;
+ return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
@@ -221,6 +231,15 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
+static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ if (Imm == 0)
+ return MCDisassembler::Fail;
+ return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
int64_t Address,
const void *Decoder) {
@@ -243,6 +262,17 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ assert(isUInt<3>(Imm) && "Invalid immediate");
+ if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
#include "RISCVGenDisassemblerTables.inc"
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
@@ -257,11 +287,19 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((Bytes[0] & 0x3) == 0x3) {
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
Insn = support::endian::read32le(Bytes.data());
LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n");
Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
Size = 4;
} else {
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
Insn = support::endian::read16le(Bytes.data());
if (!STI.getFeatureBits()[RISCV::Feature64Bit]) {
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
index 300e6fd9750a..979c8f4e2fa7 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "RISCVInstPrinter.h"
-#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCExpr.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -36,10 +36,9 @@ using namespace llvm;
#include "RISCVGenCompressInstEmitter.inc"
static cl::opt<bool>
-NoAliases("riscv-no-aliases",
- cl::desc("Disable the emission of assembler pseudo instructions"),
- cl::init(false),
- cl::Hidden);
+ NoAliases("riscv-no-aliases",
+ cl::desc("Disable the emission of assembler pseudo instructions"),
+ cl::init(false), cl::Hidden);
void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
@@ -49,7 +48,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (!NoAliases)
Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
if (Res)
- NewMI = const_cast<MCInst*>(&UncompressedMI);
+ NewMI = const_cast<MCInst *>(&UncompressedMI);
if (NoAliases || !printAliasInstr(NewMI, STI, O))
printInstruction(NewMI, STI, O);
printAnnotation(O, Annot);
@@ -60,8 +59,8 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
}
void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
+ const MCSubtargetInfo &STI, raw_ostream &O,
+ const char *Modifier) {
assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &MO = MI->getOperand(OpNo);
@@ -79,10 +78,23 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
MO.getExpr()->print(O, &MAI);
}
+void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+ if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+ O << SysReg->Name;
+ else
+ O << Imm;
+}
+
void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned FenceArg = MI->getOperand(OpNo).getImm();
+ assert (((FenceArg >> 4) == 0) && "Invalid immediate in printFenceArg");
+
if ((FenceArg & RISCVFenceField::I) != 0)
O << 'i';
if ((FenceArg & RISCVFenceField::O) != 0)
@@ -91,11 +103,12 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
O << 'r';
if ((FenceArg & RISCVFenceField::W) != 0)
O << 'w';
+ if (FenceArg == 0)
+ O << "unknown";
}
void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
auto FRMArg =
static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
O << RISCVFPRndMode::roundingModeToString(FRMArg);
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
index 241be8daf113..0f9bed184996 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
@@ -32,6 +32,8 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, const char *Modifier = nullptr);
+ void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printFenceArg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -48,6 +50,6 @@ public:
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = RISCV::ABIRegAltName);
};
-}
+} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 9ba7ebd0eb0f..7672fea5d95b 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -7,114 +7,58 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVFixupKinds.h"
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVAsmBackend.h"
+#include "RISCVMCExpr.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class RISCVAsmBackend : public MCAsmBackend {
- const MCSubtargetInfo &STI;
- uint8_t OSABI;
- bool Is64Bit;
+// If linker relaxation is enabled, or the relax option had previously been
+// enabled, always emit relocations even if the fixup can be resolved. This is
+// necessary for correctness as offsets may change during relaxation.
+bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ bool ShouldForce = false;
-public:
- RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit) {}
- ~RISCVAsmBackend() override {}
-
- // Generate diff expression relocations if the relax feature is enabled,
- // otherwise it is safe for the assembler to calculate these internally.
- bool requiresDiffExpressionRelocations() const override {
- return STI.getFeatureBits()[RISCV::FeatureRelax];
- }
- void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved,
- const MCSubtargetInfo *STI) const override;
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override;
-
- // If linker relaxation is enabled, always emit relocations even if the fixup
- // can be resolved. This is necessary for correctness as offsets may change
- // during relaxation.
- bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override {
- return STI.getFeatureBits()[RISCV::FeatureRelax];
- }
-
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
- }
-
- bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout,
- const bool WasForced) const override;
-
- unsigned getNumFixupKinds() const override {
- return RISCV::NumTargetFixupKinds;
- }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo Infos[] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // RISCVFixupKinds.h.
- //
- // name offset bits flags
- { "fixup_riscv_hi20", 12, 20, 0 },
- { "fixup_riscv_lo12_i", 20, 12, 0 },
- { "fixup_riscv_lo12_s", 0, 32, 0 },
- { "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_pcrel_lo12_i", 20, 12, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_pcrel_lo12_s", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_relax", 0, 0, 0 }
- };
- static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
- "Not all fixup kinds added to Infos array");
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ break;
+ case RISCV::fixup_riscv_pcrel_lo12_i:
+ case RISCV::fixup_riscv_pcrel_lo12_s:
+ // For pcrel_lo12, force a relocation if the target of the corresponding
+ // pcrel_hi20 is not in the same fragment.
+ const MCFixup *T = cast<RISCVMCExpr>(Fixup.getValue())->getPCRelHiFixup();
+ if (!T) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "could not find corresponding %pcrel_hi");
+ return false;
+ }
+
+ switch ((unsigned)T->getKind()) {
+ default:
+ llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
+ break;
+ case RISCV::fixup_riscv_pcrel_hi20:
+ ShouldForce = T->getValue()->findAssociatedFragment() !=
+ Fixup.getValue()->findAssociatedFragment();
+ break;
+ }
+ break;
}
- bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override;
- unsigned getRelaxedOpcode(unsigned Op) const;
-
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
-
-
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-};
-
+ return ShouldForce || STI.getFeatureBits()[RISCV::FeatureRelax] ||
+ ForceRelocs;
+}
bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
bool Resolved,
@@ -348,8 +292,6 @@ RISCVAsmBackend::createObjectTargetWriter() const {
return createRISCVELFObjectWriter(OSABI, Is64Bit);
}
-} // end anonymous namespace
-
MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
new file mode 100644
index 000000000000..b98e45f4053f
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -0,0 +1,113 @@
+//===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+
+#include "MCTargetDesc/RISCVFixupKinds.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+class MCAssembler;
+class MCObjectTargetWriter;
+class raw_ostream;
+
+class RISCVAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo &STI;
+ uint8_t OSABI;
+ bool Is64Bit;
+ bool ForceRelocs = false;
+
+public:
+ RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
+ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
+ Is64Bit(Is64Bit) {}
+ ~RISCVAsmBackend() override {}
+
+ void setForceRelocs() { ForceRelocs = true; }
+
+ // Generate diff expression relocations if the relax feature is enabled or had
+ // previously been enabled, otherwise it is safe for the assembler to
+ // calculate these internally.
+ bool requiresDiffExpressionRelocations() const override {
+ return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+ }
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override;
+
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
+ }
+
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override;
+
+ unsigned getNumFixupKinds() const override {
+ return RISCV::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // RISCVFixupKinds.h.
+ //
+ // name offset bits flags
+ { "fixup_riscv_hi20", 12, 20, 0 },
+ { "fixup_riscv_lo12_i", 20, 12, 0 },
+ { "fixup_riscv_lo12_s", 0, 32, 0 },
+ { "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_pcrel_lo12_i", 20, 12, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_pcrel_lo12_s", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_relax", 0, 0, 0 }
+ };
+ static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
+ "Not all fixup kinds added to Infos array");
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+ unsigned getRelaxedOpcode(unsigned Op) const;
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override;
+
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 6428b11cfe9c..a6ba1e41e964 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -38,5 +38,9 @@ MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+void RISCVTargetELFStreamer::emitDirectiveOptionPush() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionPop() {}
void RISCVTargetELFStreamer::emitDirectiveOptionRVC() {}
void RISCVTargetELFStreamer::emitDirectiveOptionNoRVC() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionNoRelax() {}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index daa7abfe1336..1f36bbc43882 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -20,8 +20,12 @@ public:
MCELFStreamer &getStreamer();
RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+ virtual void emitDirectiveOptionPush();
+ virtual void emitDirectiveOptionPop();
virtual void emitDirectiveOptionRVC();
virtual void emitDirectiveOptionNoRVC();
+ virtual void emitDirectiveOptionRelax();
+ virtual void emitDirectiveOptionNoRelax();
};
}
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 8a796a014b33..c5a4ffc0e360 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -196,7 +196,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
MCInstrDesc const &Desc = MCII.get(MI.getOpcode());
unsigned MIFrm = Desc.TSFlags & RISCVII::InstFormatMask;
- // If the destination is an immediate, there is nothing to do
+ // If the destination is an immediate, there is nothing to do.
if (MO.isImm())
return MO.getImm();
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 085dcd4e5f66..53648a5922c8 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -14,12 +14,12 @@
#include "RISCV.h"
#include "RISCVMCExpr.h"
+#include "RISCVFixupKinds.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -41,9 +41,90 @@ void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << ')';
}
+const MCFixup *RISCVMCExpr::getPCRelHiFixup() const {
+ MCValue AUIPCLoc;
+ if (!getSubExpr()->evaluateAsRelocatable(AUIPCLoc, nullptr, nullptr))
+ return nullptr;
+
+ const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+ if (!AUIPCSRE)
+ return nullptr;
+
+ const auto *DF =
+ dyn_cast_or_null<MCDataFragment>(AUIPCSRE->findAssociatedFragment());
+ if (!DF)
+ return nullptr;
+
+ const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ for (const MCFixup &F : DF->getFixups()) {
+ if (F.getOffset() != AUIPCSymbol->getOffset())
+ continue;
+
+ switch ((unsigned)F.getKind()) {
+ default:
+ continue;
+ case RISCV::fixup_riscv_pcrel_hi20:
+ return &F;
+ }
+ }
+
+ return nullptr;
+}
+
+bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ // VK_RISCV_PCREL_LO has to be handled specially. The MCExpr inside is
+ // actually the location of a auipc instruction with a VK_RISCV_PCREL_HI fixup
+ // pointing to the real target. We need to generate an MCValue in the form of
+ // (<real target> + <offset from this fixup to the auipc fixup>). The Fixup
+ // is pcrel relative to the VK_RISCV_PCREL_LO fixup, so we need to add the
+ // offset to the VK_RISCV_PCREL_HI Fixup from VK_RISCV_PCREL_LO to correct.
+ MCValue AUIPCLoc;
+ if (!getSubExpr()->evaluateAsValue(AUIPCLoc, *Layout))
+ return false;
+
+ const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+ // Don't try to evaluate %pcrel_hi/%pcrel_lo pairs that cross fragment
+ // boundries.
+ if (!AUIPCSRE ||
+ findAssociatedFragment() != AUIPCSRE->findAssociatedFragment())
+ return false;
+
+ const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ if (!AUIPCSymbol)
+ return false;
+
+ const MCFixup *TargetFixup = getPCRelHiFixup();
+ if (!TargetFixup)
+ return false;
+
+ if ((unsigned)TargetFixup->getKind() != RISCV::fixup_riscv_pcrel_hi20)
+ return false;
+
+ MCValue Target;
+ if (!TargetFixup->getValue()->evaluateAsValue(Target, *Layout))
+ return false;
+
+ if (!Target.getSymA() || !Target.getSymA()->getSymbol().isInSection())
+ return false;
+
+ if (&Target.getSymA()->getSymbol().getSection() !=
+ findAssociatedFragment()->getParent())
+ return false;
+
+ uint64_t AUIPCOffset = AUIPCSymbol->getOffset();
+
+ Res = MCValue::get(Target.getSymA(), nullptr,
+ Target.getConstant() + (Fixup->getOffset() - AUIPCOffset));
+ return true;
+}
+
bool RISCVMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
+ if (Kind == VK_RISCV_PCREL_LO && evaluatePCRelLo(Res, Layout, Fixup))
+ return true;
+
if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
return false;
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index d2e0f6b6cdae..4eafcc08b51f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -39,6 +39,9 @@ private:
int64_t evaluateAsInt64(int64_t Value) const;
+ bool evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const;
+
explicit RISCVMCExpr(const MCExpr *Expr, VariantKind Kind)
: Expr(Expr), Kind(Kind) {}
@@ -50,6 +53,13 @@ public:
const MCExpr *getSubExpr() const { return Expr; }
+ /// Get the MCExpr of the VK_RISCV_PCREL_HI Fixup that the
+ /// VK_RISCV_PCREL_LO points to.
+ ///
+ /// \returns nullptr if this isn't a VK_RISCV_PCREL_LO pointing to a
+ /// VK_RISCV_PCREL_HI.
+ const MCFixup *getPCRelHiFixup() const;
+
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 2d5205aa7ef7..8d5ef3dbd17f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -23,6 +23,14 @@ RISCVTargetAsmStreamer::RISCVTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: RISCVTargetStreamer(S), OS(OS) {}
+void RISCVTargetAsmStreamer::emitDirectiveOptionPush() {
+ OS << "\t.option\tpush\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionPop() {
+ OS << "\t.option\tpop\n";
+}
+
void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
OS << "\t.option\trvc\n";
}
@@ -30,3 +38,11 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
void RISCVTargetAsmStreamer::emitDirectiveOptionNoRVC() {
OS << "\t.option\tnorvc\n";
}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionRelax() {
+ OS << "\t.option\trelax\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionNoRelax() {
+ OS << "\t.option\tnorelax\n";
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 525c20810f24..74ec9e303933 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -18,8 +18,12 @@ class RISCVTargetStreamer : public MCTargetStreamer {
public:
RISCVTargetStreamer(MCStreamer &S);
+ virtual void emitDirectiveOptionPush() = 0;
+ virtual void emitDirectiveOptionPop() = 0;
virtual void emitDirectiveOptionRVC() = 0;
virtual void emitDirectiveOptionNoRVC() = 0;
+ virtual void emitDirectiveOptionRelax() = 0;
+ virtual void emitDirectiveOptionNoRelax() = 0;
};
// This part is for ascii assembly output
@@ -29,8 +33,12 @@ class RISCVTargetAsmStreamer : public RISCVTargetStreamer {
public:
RISCVTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void emitDirectiveOptionPush() override;
+ void emitDirectiveOptionPop() override;
void emitDirectiveOptionRVC() override;
void emitDirectiveOptionNoRVC() override;
+ void emitDirectiveOptionRelax() override;
+ void emitDirectiveOptionNoRelax() override;
};
}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm/lib/Target/RISCV/RISCV.h
index 2e4f536aca35..b25aee46200d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.h
@@ -15,7 +15,8 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
#define LLVM_LIB_TARGET_RISCV_RISCV_H
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class RISCVTargetMachine;
@@ -36,6 +37,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
+
+FunctionPass *createRISCVExpandPseudoPass();
+void initializeRISCVExpandPseudoPass(PassRegistry &);
}
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm/lib/Target/RISCV/RISCV.td
index 281378cb2eee..0e86e2bc5e98 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.td
@@ -68,6 +68,12 @@ include "RISCVCallingConv.td"
include "RISCVInstrInfo.td"
//===----------------------------------------------------------------------===//
+// Named operands for CSR instructions.
+//===----------------------------------------------------------------------===//
+
+include "RISCVSystemOperands.td"
+
+//===----------------------------------------------------------------------===//
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..35c185aa5edd
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -0,0 +1,556 @@
+//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVExpandPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicBinOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+ bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandPseudo::ID = 0;
+
+bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+ NextMBBI);
+ case RISCV::PseudoCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoMaskedCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ }
+
+ return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_W;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_W_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_W;
+ case AtomicOrdering::Release:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_W_AQ_RL;
+ }
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+ DebugLoc DL, MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ // .loop:
+ // lr.w dest, (addr)
+ // binop scratch, dest, val
+ // sc.w scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned DestReg,
+ unsigned OldValReg, unsigned NewValReg,
+ unsigned MaskReg, unsigned ScratchReg) {
+ assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+ assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+ assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+ // We select bits from newval and oldval using:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(OldValReg)
+ .addReg(NewValReg);
+ BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(MaskReg);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+ .addReg(OldValReg)
+ .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ unsigned MaskReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+ // .loop:
+ // lr.w destreg, (alignedaddr)
+ // binop scratch, destreg, incr
+ // xor scratch, destreg, scratch
+ // and scratch, scratch, masktargetdata
+ // xor scratch, destreg, scratch
+ // sc.w scratch, scratch, (alignedaddr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(RISCV::X0)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+
+ insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+ ScratchReg);
+
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+bool RISCVExpandPseudo::expandAtomicBinOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopMBB);
+ MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopMBB);
+
+ if (!IsMasked)
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ else
+ doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+ Width);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned ValReg,
+ unsigned ShamtReg) {
+ BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
+bool RISCVExpandPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(IsMasked == true &&
+ "Should only need to expand masked atomic max/min");
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Scratch1Reg = MI.getOperand(1).getReg();
+ unsigned Scratch2Reg = MI.getOperand(2).getReg();
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned IncrReg = MI.getOperand(4).getReg();
+ unsigned MaskReg = MI.getOperand(5).getReg();
+ bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+ //
+ // .loophead:
+ // lr.w destreg, (alignedaddr)
+ // and scratch2, destreg, mask
+ // mv scratch1, destreg
+ // [sext scratch2 if signed min/max]
+ // ifnochangeneeded scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+ .addReg(DestReg)
+ .addImm(0);
+
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+
+ // .loopifbody:
+ // xor scratch1, destreg, incr
+ // and scratch1, scratch1, mask
+ // xor scratch1, destreg, scratch1
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+ MaskReg, Scratch1Reg);
+
+ // .looptail:
+ // sc.w scratch1, scratch1, (addr)
+ // bnez scratch1, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ .addReg(AddrReg)
+ .addReg(Scratch1Reg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(Scratch1Reg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+bool RISCVExpandPseudo::expandAtomicCmpXchg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopHeadMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned CmpValReg = MI.getOperand(3).getReg();
+ unsigned NewValReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
+
+ if (!IsMasked) {
+ // .loophead:
+ // lr.w dest, (addr)
+ // bne dest, cmpval, done
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(DestReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+ // .looptail:
+ // sc.w scratch, newval, (addr)
+ // bnez scratch, loophead
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(NewValReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ } else {
+ // .loophead:
+ // lr.w dest, (addr)
+ // and scratch, dest, mask
+ // bne scratch, cmpval, done
+ unsigned MaskReg = MI.getOperand(5).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+
+ // .looptail:
+ // xor scratch, dest, newval
+ // and scratch, scratch, mask
+ // xor scratch, dest, scratch
+ // sc.w scratch, scratch, (adrr)
+ // bnez scratch, loophead
+ insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
+ MaskReg, ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ }
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
+ RISCV_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index a816028f9d8b..74417899c8da 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -148,8 +148,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
- MachineBasicBlock::iterator LastFrameDestroy = MBBI;
- std::advance(LastFrameDestroy, -MFI.getCalleeSavedInfo().size());
+ auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
uint64_t StackSize = MFI.getStackSize();
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 04441b9a9b15..aa80365feb83 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
@@ -56,20 +57,47 @@ public:
private:
void doPeepholeLoadStoreADDI();
- void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
- doPeepholeBuildPairF64SplitF64();
}
-void RISCVDAGToDAGISel::Select(SDNode *Node) {
- unsigned Opcode = Node->getOpcode();
- MVT XLenVT = Subtarget->getXLenVT();
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
+ MVT XLenVT) {
+ RISCVMatInt::InstSeq Seq;
+ RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq);
+
+ SDNode *Result;
+ SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
+ for (RISCVMatInt::Inst &Inst : Seq) {
+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
+ if (Inst.Opc == RISCV::LUI)
+ Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
+ else
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
+
+ // Only the first instruction has X0 as its source.
+ SrcReg = SDValue(Result, 0);
+ }
+
+ return Result;
+}
+
+// Returns true if the Node is an ISD::AND with a constant argument. If so,
+// set Mask to that constant value.
+static bool isConstantMask(SDNode *Node, uint64_t &Mask) {
+ if (Node->getOpcode() == ISD::AND &&
+ Node->getOperand(1).getOpcode() == ISD::Constant) {
+ Mask = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ return true;
+ }
+ return false;
+}
- // If we have a custom node, we have already selected
+void RISCVDAGToDAGISel::Select(SDNode *Node) {
+ // If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
Node->setNodeId(-1);
@@ -78,27 +106,58 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.
+ unsigned Opcode = Node->getOpcode();
+ MVT XLenVT = Subtarget->getXLenVT();
+ SDLoc DL(Node);
EVT VT = Node->getValueType(0);
- if (Opcode == ISD::Constant && VT == XLenVT) {
- auto *ConstNode = cast<ConstantSDNode>(Node);
- // Materialize zero constants as copies from X0. This allows the coalescer
- // to propagate these into other instructions.
- if (ConstNode->isNullValue()) {
+
+ switch (Opcode) {
+ case ISD::Constant: {
+ auto ConstNode = cast<ConstantSDNode>(Node);
+ if (VT == XLenVT && ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
+ int64_t Imm = ConstNode->getSExtValue();
+ if (XLenVT == MVT::i64) {
+ ReplaceNode(Node, selectImm(CurDAG, SDLoc(Node), Imm, XLenVT));
+ return;
+ }
+ break;
}
- if (Opcode == ISD::FrameIndex) {
- SDLoc DL(Node);
+ case ISD::FrameIndex: {
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
+ case ISD::SRL: {
+ if (!Subtarget->is64Bit())
+ break;
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ uint64_t Mask;
+ // Match (srl (and val, mask), imm) where the result would be a
+ // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
+ // is equivalent to this (SimplifyDemandedBits may have removed lower bits
+ // from the mask that aren't necessary due to the right-shifting).
+ if (Op1.getOpcode() == ISD::Constant &&
+ isConstantMask(Op0.getNode(), Mask)) {
+ uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue();
+
+ if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
+ SDValue ShAmtVal =
+ CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
+ CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0),
+ ShAmtVal);
+ return;
+ }
+ }
+ }
+ }
// Select the default instruction.
SelectCode(Node);
@@ -216,43 +275,6 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
}
}
-// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
-// built of two i32 values, only to be split apart again. This must be done
-// here as a peephole optimisation as the DAG has not been fully legalized at
-// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
-// some nodes would not yet have been replaced with libcalls.
-void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
-
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any nodes other than SplitF64Pseudo.
- if (N->use_empty() || !N->isMachineOpcode() ||
- !(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
- continue;
-
- // If the operand to SplitF64 is a BuildPairF64, the split operation is
- // redundant. Just use the operands to BuildPairF64 as the result.
- SDValue F64Val = N->getOperand(0);
- if (F64Val.isMachineOpcode() &&
- F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
- LLVM_DEBUG(
- dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
- "with BuildPairF64Pseudo operands:\n");
- LLVM_DEBUG(dbgs() << "N: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "F64Val: ");
- LLVM_DEBUG(F64Val->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
- SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
- SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
- CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
- }
- }
- CurDAG->RemoveDeadNodes();
-}
-
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 87796e5b1097..508dcbd009ed 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -80,6 +80,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ if (Subtarget.is64Bit()) {
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ }
+
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
@@ -111,6 +118,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
ISD::SETGT, ISD::SETGE, ISD::SETNE};
+ ISD::NodeType FPOpToExtend[] = {
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
+
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
@@ -119,6 +129,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ for (auto Op : FPOpToExtend)
+ setOperationAction(Op, MVT::f32, Expand);
}
if (Subtarget.hasStdExtD()) {
@@ -131,16 +143,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ for (auto Op : FPOpToExtend)
+ setOperationAction(Op, MVT::f64, Expand);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
- else
+ setMinCmpXchgSizeInBits(32);
+ } else {
setMaxAtomicSizeInBitsSupported(0);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -160,6 +176,34 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+ case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
+ case Intrinsic::riscv_masked_atomicrmw_add_i32:
+ case Intrinsic::riscv_masked_atomicrmw_sub_i32:
+ case Intrinsic::riscv_masked_atomicrmw_nand_i32:
+ case Intrinsic::riscv_masked_atomicrmw_max_i32:
+ case Intrinsic::riscv_masked_atomicrmw_min_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umax_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umin_i32:
+ case Intrinsic::riscv_masked_cmpxchg_i32:
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
+}
+
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
@@ -228,6 +272,10 @@ bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return TargetLowering::isZExtFree(Val, VT2);
}
+bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
+ return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
+}
+
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly in the RISC-V
// ISA.
@@ -283,9 +331,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
- return LowerFRAMEADDR(Op, DAG);
+ return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
- return LowerRETURNADDR(Op, DAG);
+ return lowerRETURNADDR(Op, DAG);
}
}
@@ -298,7 +346,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
- if (isPositionIndependent() || Subtarget.is64Bit())
+ if (isPositionIndependent())
report_fatal_error("Unable to lowerGlobalAddress");
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
@@ -323,7 +371,7 @@ SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
const BlockAddress *BA = N->getBlockAddress();
int64_t Offset = N->getOffset();
- if (isPositionIndependent() || Subtarget.is64Bit())
+ if (isPositionIndependent())
report_fatal_error("Unable to lowerBlockAddress");
SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
@@ -357,26 +405,6 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
}
}
-SDValue RISCVTargetLowering::lowerExternalSymbol(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- ExternalSymbolSDNode *N = cast<ExternalSymbolSDNode>(Op);
- const char *Sym = N->getSymbol();
-
- // TODO: should also handle gp-relative loads.
-
- if (isPositionIndependent() || Subtarget.is64Bit())
- report_fatal_error("Unable to lowerExternalSymbol");
-
- SDValue GAHi = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_HI);
- SDValue GALo = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
- SDValue MNLo =
- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
- return MNLo;
-}
-
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(0);
SDValue TrueV = Op.getOperand(1);
@@ -432,7 +460,7 @@ SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
@@ -455,7 +483,7 @@ SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
-SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
@@ -472,7 +500,7 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
int Off = -XLenInBytes;
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(Off, DL, VT);
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
@@ -485,6 +513,84 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
+// Return true if the given node is a shift with a non-constant shift amount.
+static bool isVariableShift(SDValue Val) {
+ switch (Val.getOpcode()) {
+ default:
+ return false;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return Val.getOperand(1).getOpcode() != ISD::Constant;
+ }
+}
+
+// Returns true if the given node is an sdiv, udiv, or urem with non-constant
+// operands.
+static bool isVariableSDivUDivURem(SDValue Val) {
+ switch (Val.getOpcode()) {
+ default:
+ return false;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::UREM:
+ return Val.getOperand(0).getOpcode() != ISD::Constant &&
+ Val.getOperand(1).getOpcode() != ISD::Constant;
+ }
+}
+
+SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA: {
+ assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
+ if (!DCI.isBeforeLegalize())
+ break;
+ SDValue RHS = N->getOperand(1);
+ if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
+ (RHS->getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
+ break;
+ SDValue LHS = N->getOperand(0);
+ SDLoc DL(N);
+ SDValue NewRHS =
+ DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
+ return DCI.CombineTo(
+ N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
+ }
+ case ISD::ANY_EXTEND: {
+ // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
+ // then instead sign-extend in order to increase the chance of being able
+ // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
+ SDValue Src = N->getOperand(0);
+ if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
+ break;
+ if (!isVariableShift(Src) &&
+ !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
+ break;
+ SDLoc DL(N);
+ return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
+ }
+ case RISCVISD::SplitF64: {
+ // If the input to SplitF64 is just BuildPairF64 then the operation is
+ // redundant. Instead, use BuildPairF64's operands directly.
+ SDValue Op0 = N->getOperand(0);
+ if (Op0->getOpcode() != RISCVISD::BuildPairF64)
+ break;
+ return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -807,10 +913,14 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- } else {
- State.addLoc(
- CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ return false;
}
+
+ if (ValVT == MVT::f32) {
+ LocVT = MVT::f32;
+ LocInfo = CCValAssign::Full;
+ }
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
@@ -859,6 +969,22 @@ void RISCVTargetLowering::analyzeOutputArgs(
}
}
+// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
+// values.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ }
+ return Val;
+}
+
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
@@ -866,21 +992,29 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
- EVT ValVT = VA.getValVT();
SDValue Val;
unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return Val;
+
+ return convertLocVTToValVT(DAG, Val, VA, DL);
+}
+
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ EVT LocVT = VA.getLocVT();
+
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
- case CCValAssign::Indirect:
break;
case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
return Val;
@@ -995,7 +1129,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
@@ -1282,13 +1415,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
- switch (VA.getLocInfo()) {
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), ArgValue);
- break;
- case CCValAssign::Indirect: {
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -1310,10 +1437,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
++i;
}
ArgValue = SpillSlot;
- break;
- }
- default:
- llvm_unreachable("Unknown loc info!");
+ } else {
+ ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
}
// Use local copy if it is a byval arg.
@@ -1415,6 +1540,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Glue the RetValue to the end of the call sequence
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
+
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
SDValue RetValue2 =
@@ -1425,15 +1551,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
RetValue2);
}
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- RetValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), RetValue);
- break;
- }
+ RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
InVals.push_back(RetValue);
}
@@ -1456,22 +1574,6 @@ bool RISCVTargetLowering::CanLowerReturn(
return true;
}
-static SDValue packIntoRegLoc(SelectionDAG &DAG, SDValue Val,
- const CCValAssign &VA, const SDLoc &DL) {
- EVT LocVT = VA.getLocVT();
-
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unexpected CCValAssign::LocInfo");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
- break;
- }
- return Val;
-}
-
SDValue
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -1514,7 +1616,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
} else {
// Handle a 'normal' return.
- Val = packIntoRegLoc(DAG, Val, VA, DL);
+ Val = convertValVTToLocVT(DAG, Val, VA, DL);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
// Guarantee that all emitted copies are stuck together.
@@ -1616,3 +1718,83 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+static Intrinsic::ID
+getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i32;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+ }
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *LrwOpScwLoop = Intrinsic::getDeclaration(
+ AI->getModule(),
+ getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+
+ // Must pass the shift amount needed to sign extend the loaded value prior
+ // to performing a signed comparison for min/max. ShiftAmt is the number of
+ // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
+ // is the number of bits to left+right shift the value in order to
+ // sign-extend.
+ if (AI->getOperation() == AtomicRMWInst::Min ||
+ AI->getOperation() == AtomicRMWInst::Max) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ unsigned ValWidth =
+ DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+ Value *SextShamt = Builder.CreateSub(
+ Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
+ return Builder.CreateCall(LrwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ }
+
+ return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *CI) const {
+ unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
+ IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+ Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *MaskedCmpXchg = Intrinsic::getDeclaration(
+ CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
+ return Builder.CreateCall(MaskedCmpXchg,
+ {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 280adb29fd02..6970900bb062 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -43,6 +43,9 @@ public:
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
@@ -51,10 +54,13 @@ public:
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -107,15 +113,27 @@ private:
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
bool IsEligibleForTailCallOptimization(CCState &CCInfo,
CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ virtual Value *emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
+ virtual Value *
+ emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder, AtomicCmpXchgInst *CI,
+ Value *AlignedAddr, Value *CmpVal,
+ Value *NewVal, Value *Mask,
+ AtomicOrdering Ord) const override;
};
}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 529e048045c6..ebd676a6056e 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -45,11 +45,12 @@ def InstFormatCSS : InstFormat<10>;
def InstFormatCIW : InstFormat<11>;
def InstFormatCL : InstFormat<12>;
def InstFormatCS : InstFormat<13>;
-def InstFormatCB : InstFormat<14>;
-def InstFormatCJ : InstFormat<15>;
-def InstFormatOther : InstFormat<16>;
+def InstFormatCA : InstFormat<14>;
+def InstFormatCB : InstFormat<15>;
+def InstFormatCJ : InstFormat<16>;
+def InstFormatOther : InstFormat<17>;
-// The following opcode names and match those given in Table 19.1 in the
+// The following opcode names match those given in Table 19.1 in the
// RISC-V User-level ISA specification ("RISC-V base opcode map").
class RISCVOpcode<bits<7> val> {
bits<7> Value = val;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 6abcbd7cc8a1..bda8bbb558eb 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -118,6 +118,19 @@ class RVInst16CS<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
let Inst{1-0} = opcode;
}
+class RVInst16CA<bits<6> funct6, bits<2> funct2, bits<2> opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCA> {
+ bits<3> rs2;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{6-5} = funct2;
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+}
+
class RVInst16CB<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
string opcodestr, string argstr>
: RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCB> {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 327e4a7d615f..76c74368ca11 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -362,9 +362,8 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
- const auto &STI = MF->getSubtarget<RISCVSubtarget>();
- if (TM.isPositionIndependent() || STI.is64Bit())
+ if (TM.isPositionIndependent())
report_fatal_error("Unable to insert indirect branch");
if (!isInt<32>(BrOffset))
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b51e4e70330d..d7cc13d4fabd 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -125,11 +125,6 @@ def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
}];
}
-def uimm12 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<12>;
- let DecoderMethod = "decodeUImmOperand<12>";
-}
-
// A 13-bit signed immediate where the least significant bit is zero.
def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
@@ -143,8 +138,7 @@ def simm13_lsb0 : Operand<OtherVT> {
}];
}
-def uimm20 : Operand<XLenVT> {
- let ParserMatchClass = UImmAsmOperand<20>;
+class UImm20Operand : Operand<XLenVT> {
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<20>";
let MCOperandPredicate = [{
@@ -155,9 +149,20 @@ def uimm20 : Operand<XLenVT> {
}];
}
+def uimm20_lui : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20, "LUI">;
+}
+def uimm20_auipc : UImm20Operand {
+ let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
+}
+
+def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
+ let ParserMethod = "parseJALOffset";
+}
+
// A 21-bit signed immediate where the least significant bit is zero.
-def simm21_lsb0 : Operand<OtherVT> {
- let ParserMatchClass = SImmAsmOperand<21, "Lsb0">;
+def simm21_lsb0_jal : Operand<OtherVT> {
+ let ParserMatchClass = Simm21Lsb0JALAsmOperand;
let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
let MCOperandPredicate = [{
@@ -172,24 +177,42 @@ def BareSymbol : AsmOperandClass {
let Name = "BareSymbol";
let RenderMethod = "addImmOperands";
let DiagnosticType = "InvalidBareSymbol";
+ let ParserMethod = "parseBareSymbol";
}
// A bare symbol.
def bare_symbol : Operand<XLenVT> {
let ParserMatchClass = BareSymbol;
- let MCOperandPredicate = [{
- return MCOp.isBareSymbolRef();
- }];
+}
+
+def CSRSystemRegister : AsmOperandClass {
+ let Name = "CSRSystemRegister";
+ let ParserMethod = "parseCSRSystemRegister";
+ let DiagnosticType = "InvalidCSRSystemRegister";
+}
+
+def csr_sysreg : Operand<XLenVT> {
+ let ParserMatchClass = CSRSystemRegister;
+ let PrintMethod = "printCSRSystemRegister";
+ let DecoderMethod = "decodeUImmOperand<12>";
}
// A parameterized register class alternative to i32imm/i64imm from Target.td.
-def ixlenimm : Operand<XLenVT> {
- let ParserMatchClass = ImmXLenAsmOperand<"">;
+def ixlenimm : Operand<XLenVT>;
+
+def ixlenimm_li : Operand<XLenVT> {
+ let ParserMatchClass = ImmXLenAsmOperand<"", "LI">;
}
// Standalone (codegen-only) immleaf patterns.
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
+// A mask value that won't affect significant shift bits.
+def immbottomxlenset : ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return countTrailingOnes<uint64_t>(Imm) >= 6;
+ return countTrailingOnes<uint64_t>(Imm) >= 5;
+}]>;
// Addressing modes.
// Necessary because a frameindex can't be matched directly in a pattern.
@@ -255,13 +278,13 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ir<bits<3> funct3, string opcodestr>
- : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins uimm12:$imm12, GPR:$rs1),
+ : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
opcodestr, "$rd, $imm12, $rs1">;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class CSR_ii<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
- (ins uimm12:$imm12, uimm5:$rs1),
+ (ins csr_sysreg:$imm12, uimm5:$rs1),
opcodestr, "$rd, $imm12, $rs1">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -285,14 +308,14 @@ class Priv<string opcodestr, bits<7> funct7>
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in {
-def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20:$imm20),
+def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
"lui", "$rd, $imm20">;
-def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20:$imm20),
+def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
"auipc", "$rd, $imm20">;
let isCall = 1 in
-def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0:$imm20),
+def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
"jal", "$rd, $imm20">;
let isCall = 1 in
@@ -379,6 +402,15 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
let rd = 0;
let imm12 = 1;
}
+
+// This is a de facto standard (as set by GNU binutils) 32-bit unimplemented
+// instruction (i.e., it should always trap, if your implementation has invalid
+// instruction traps).
+def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> {
+ let rs1 = 0;
+ let rd = 0;
+ let imm12 = 0b110000000000;
+}
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
def CSRRW : CSR_ir<0b001, "csrrw">;
@@ -467,7 +499,7 @@ def : InstAlias<"nop", (ADDI X0, X0, 0)>;
// expanded to real instructions immediately.
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
isCodeGenOnly = 0, isAsmParserOnly = 1 in
-def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm:$imm), [],
+def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm_li:$imm), [],
"li", "$rd, $imm">;
def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
@@ -516,8 +548,8 @@ def : InstAlias<"bleu $rs, $rt, $offset",
(BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset", (JAL X0, simm21_lsb0:$offset)>;
-def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0:$offset)>;
+def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>;
def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>;
def : InstAlias<"ret", (JALR X0, X1, 0), 2>;
@@ -538,18 +570,67 @@ def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, 0xC80, X0)>;
def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, 0xC81, X0)>;
} // Predicates = [IsRV32]
-def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, uimm12:$csr, X0)>;
-def : InstAlias<"csrw $csr, $rs", (CSRRW X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrs $csr, $rs", (CSRRS X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrc $csr, $rs", (CSRRC X0, uimm12:$csr, GPR:$rs)>;
+def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr, X0)>;
+def : InstAlias<"csrw $csr, $rs", (CSRRW X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrs $csr, $rs", (CSRRS X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrc $csr, $rs", (CSRRC X0, csr_sysreg:$csr, GPR:$rs)>;
-def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, uimm12:$csr, uimm5:$imm)>;
+def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+let EmitPriority = 0 in {
+def : InstAlias<"csrw $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrs $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrc $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+def : InstAlias<"csrrw $rd, $csr, $imm", (CSRRWI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrs $rd, $csr, $imm", (CSRRSI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+}
def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>;
def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
+let EmitPriority = 0 in {
+def : InstAlias<"add $rd, $rs1, $imm12",
+ (ADDI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"and $rd, $rs1, $imm12",
+ (ANDI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"xor $rd, $rs1, $imm12",
+ (XORI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"or $rd, $rs1, $imm12",
+ (ORI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sll $rd, $rs1, $shamt",
+ (SLLI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"srl $rd, $rs1, $shamt",
+ (SRLI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"sra $rd, $rs1, $shamt",
+ (SRAI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+let Predicates = [IsRV64] in {
+def : InstAlias<"addw $rd, $rs1, $imm12",
+ (ADDIW GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sllw $rd, $rs1, $shamt",
+ (SLLIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"srlw $rd, $rs1, $shamt",
+ (SRLIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"sraw $rd, $rs1, $shamt",
+ (SRAIW GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+} // Predicates = [IsRV64]
+def : InstAlias<"slt $rd, $rs1, $imm12",
+ (SLTI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sltu $rd, $rs1, $imm12",
+ (SLTIU GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+}
+
+def : MnemonicAlias<"move", "mv">;
+
+// The SCALL and SBREAK instructions wererenamed to ECALL and EBREAK in
+// version 2.1 of the user-level ISA. Like the GNU toolchain, we still accept
+// the old name for backwards compatibility.
+def : MnemonicAlias<"scall", "ecall">;
+def : MnemonicAlias<"sbreak", "ebreak">;
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//
@@ -560,7 +641,7 @@ def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
/// Generic pattern classes
-class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
: Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
@@ -573,12 +654,37 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
}]>;
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def sexti32 : PatFrags<(ops node:$src),
+ [(sext_inreg node:$src, i32),
+ (assertsexti32 node:$src)]>;
+def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
+}]>;
+def zexti32 : PatFrags<(ops node:$src),
+ [(and node:$src, 0xffffffff),
+ (assertzexti32 node:$src)]>;
+// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
+// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
+// upper 32 bits.
+def shiftwamt_mask : ImmLeaf<XLenVT, [{
+ return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
+}]>;
+def shiftwamt : PatFrags<(ops node:$src),
+ [(assertzexti5 (and node:$src, shiftwamt_mask)),
+ (assertzexti5 node:$src)]>;
/// Immediates
def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>;
def : Pat<(simm32hi20:$imm), (LUI (HI20 imm:$imm))>;
-def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>;
+def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>,
+ Requires<[IsRV32]>;
/// Simple arithmetic operations
@@ -591,13 +697,23 @@ def : PatGprGpr<and, AND>;
def : PatGprSimm12<and, ANDI>;
def : PatGprGpr<xor, XOR>;
def : PatGprSimm12<xor, XORI>;
-def : PatGprGpr<shl, SLL>;
def : PatGprUimmLog2XLen<shl, SLLI>;
-def : PatGprGpr<srl, SRL>;
def : PatGprUimmLog2XLen<srl, SRLI>;
-def : PatGprGpr<sra, SRA>;
def : PatGprUimmLog2XLen<sra, SRAI>;
+// Match both a plain shift and one where the shift amount is masked (this is
+// typically introduced when the legalizer promotes the shift amount and
+// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
+// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
+class shiftop<SDPatternOperator operator>
+ : PatFrags<(ops node:$val, node:$count),
+ [(operator node:$val, node:$count),
+ (operator node:$val, (and node:$count, immbottomxlenset))]>;
+
+def : PatGprGpr<shiftop<shl>, SLL>;
+def : PatGprGpr<shiftop<srl>, SRL>;
+def : PatGprGpr<shiftop<sra>, SRA>;
+
/// FrameIndex calculations
def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
@@ -614,7 +730,9 @@ def : PatGprSimm12<setult, SLTIU>;
// Define pattern expansions for setcc operations that aren't directly
// handled by a RISC-V instruction.
+def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
@@ -638,7 +756,7 @@ def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
// Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch
// instruction.
class BccPat<PatFrag CondOp, RVInstB Inst>
- : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
(Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>;
def : BccPat<seteq, BEQ>;
@@ -649,7 +767,7 @@ def : BccPat<setult, BLTU>;
def : BccPat<setuge, BGEU>;
class BccSwapPat<PatFrag CondOp, RVInst InstBcc>
- : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+ : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
(InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>;
// Condition codes that don't have matching RISC-V branch instructions, but
@@ -664,8 +782,8 @@ def : BccSwapPat<setule, BGEU>;
def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>;
let isBarrier = 1, isBranch = 1, isTerminator = 1 in
-def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>,
- PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>;
+def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
+ PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
let isCall = 1, Defs=[X1] in
let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
@@ -720,6 +838,11 @@ def : Pat<(Tail (iPTR tglobaladdr:$dst)),
def : Pat<(Tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "lla", "$dst, $src">;
+
/// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -737,7 +860,7 @@ defm : LdPat<sextloadi8, LB>;
defm : LdPat<extloadi8, LB>;
defm : LdPat<sextloadi16, LH>;
defm : LdPat<extloadi16, LH>;
-defm : LdPat<load, LW>;
+defm : LdPat<load, LW>, Requires<[IsRV32]>;
defm : LdPat<zextloadi8, LBU>;
defm : LdPat<zextloadi16, LHU>;
@@ -756,7 +879,7 @@ multiclass StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
defm : StPat<truncstorei8, SB, GPR>;
defm : StPat<truncstorei16, SH, GPR>;
-defm : StPat<store, SW, GPR>;
+defm : StPat<store, SW, GPR>, Requires<[IsRV32]>;
/// Fences
@@ -764,13 +887,13 @@ defm : StPat<store, SW, GPR>;
// Manual: Volume I.
// fence acquire -> fence r, rw
-def : Pat<(atomic_fence (i32 4), (imm)), (FENCE 0b10, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 4), (imm)), (FENCE 0b10, 0b11)>;
// fence release -> fence rw, w
-def : Pat<(atomic_fence (i32 5), (imm)), (FENCE 0b11, 0b1)>;
+def : Pat<(atomic_fence (XLenVT 5), (imm)), (FENCE 0b11, 0b1)>;
// fence acq_rel -> fence.tso
-def : Pat<(atomic_fence (i32 6), (imm)), (FENCE_TSO)>;
+def : Pat<(atomic_fence (XLenVT 6), (imm)), (FENCE_TSO)>;
// fence seq_cst -> fence rw, rw
-def : Pat<(atomic_fence (i32 7), (imm)), (FENCE 0b11, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
// Lowering for atomic load and store is defined in RISCVInstrInfoA.td.
// Although these are lowered to fence+load/store instructions defined in the
@@ -788,6 +911,66 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(CallSeqEnd timm:$amt1, timm:$amt2)]>;
} // Defs = [X2], Uses = [X2]
+/// RV64 patterns
+
+let Predicates = [IsRV64] in {
+
+/// sext and zext
+
+def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
+def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>;
+
+/// ALU operations
+
+def : Pat<(sext_inreg (add GPR:$rs1, GPR:$rs2), i32),
+ (ADDW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (add GPR:$rs1, simm12:$imm12), i32),
+ (ADDIW GPR:$rs1, simm12:$imm12)>;
+def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
+ (SUBW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
+ (SLLIW GPR:$rs1, uimm5:$shamt)>;
+// (srl (zexti32 ...), uimm5:$shamt) is matched with custom code due to the
+// need to undo manipulation of the mask value performed by DAGCombine.
+def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+ (SRAIW GPR:$rs1, uimm5:$shamt)>;
+
+// For variable-length shifts, we rely on assertzexti5 being inserted during
+// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
+// guarantee that selecting a 32-bit variable shift is legal (as the variable
+// shift is known to be <= 32). We must also be careful not to create
+// semantically incorrect patterns. For instance, selecting SRLW for
+// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
+// the result are used or not (in the case where rs2=0, this is a
+// sign-extension operation).
+
+def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
+ (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
+ (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
+ (SRLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
+ (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+ (SRAW GPR:$rs1, GPR:$rs2)>;
+
+/// Loads
+
+defm : LdPat<sextloadi32, LW>;
+defm : LdPat<extloadi32, LW>;
+defm : LdPat<zextloadi32, LWU>;
+defm : LdPat<load, LD>;
+
+/// Stores
+
+defm : StPat<truncstorei32, SW, GPR>;
+defm : StPat<store, SD, GPR>;
+} // Predicates = [IsRV64]
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 379322060438..9cb1d2f0b627 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -44,6 +44,17 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
+multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
+ def : Pat<(StoreOp GPR:$rs1, StTy:$rs2), (Inst StTy:$rs2, GPR:$rs1, 0)>;
+ def : Pat<(StoreOp AddrFI:$rs1, StTy:$rs2), (Inst StTy:$rs2, AddrFI:$rs1, 0)>;
+ def : Pat<(StoreOp (add GPR:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (add AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (IsOrAdd AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+ (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -91,7 +102,177 @@ defm : LdPat<atomic_load_8, LB>;
defm : LdPat<atomic_load_16, LH>;
defm : LdPat<atomic_load_32, LW>;
-defm : StPat<atomic_store_8, SB, GPR>;
-defm : StPat<atomic_store_16, SH, GPR>;
-defm : StPat<atomic_store_32, SW, GPR>;
-} // Predicates = [HasStdExtF]
+defm : AtomicStPat<atomic_store_8, SB, GPR>;
+defm : AtomicStPat<atomic_store_16, SH, GPR>;
+defm : AtomicStPat<atomic_store_32, SW, GPR>;
+
+/// AMOs
+
+multiclass AMOPat<string AtomicOp, string BaseInst> {
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst)>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+}
+
+defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
+defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
+defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
+defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
+defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
+defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
+defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
+defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
+defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
+
+def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
+ (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
+ (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// Pseudo AMOs
+
+class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
+class PseudoMaskedAMO
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOMinMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$sextshamt,
+ ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOUMinUMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+
+class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering)>;
+
+def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+ PseudoMaskedAtomicSwap32>;
+def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+ PseudoMaskedAtomicLoadAdd32>;
+def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+ PseudoMaskedAtomicLoadSub32>;
+def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+ PseudoMaskedAtomicLoadNand32>;
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
+
+/// Compare and exchange
+
+class PseudoCmpXchg
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst> {
+ def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
+def PseudoCmpXchg32 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+
+def PseudoMaskedCmpXchg32
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
+ i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def : Pat<(int_riscv_masked_cmpxchg_i32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+
+} // Predicates = [HasStdExtA]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 5d1c62c0b653..ad68b5a7dc97 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -167,7 +167,7 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
[{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<10>";
+ let DecoderMethod = "decodeUImmNonZeroOperand<10>";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -182,12 +182,12 @@ def simm10_lsb0000nonzero : Operand<XLenVT>,
[{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<10>";
+ let DecoderMethod = "decodeSImmNonZeroOperand<10>";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- return isShiftedInt<6, 4>(Imm);
+ return isShiftedInt<6, 4>(Imm) && (Imm != 0);
}];
}
@@ -258,16 +258,13 @@ class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,
- bit RV64only>
- : RVInst16CS<0b100, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
+class CS_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
+ RegisterClass cls>
+ : RVInst16CA<funct6, funct2, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
OpcodeStr, "$rd, $rs2"> {
bits<3> rd;
let Constraints = "$rd = $rd_wb";
- let Inst{12} = RV64only;
- let Inst{11-10} = 0b11;
let Inst{9-7} = rd;
- let Inst{6-5} = funct2;
}
//===----------------------------------------------------------------------===//
@@ -411,14 +408,14 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:
let Inst{6-2} = imm{4-0};
}
-def C_SUB : CS_ALU<0b00, "c.sub", GPRC, 0>;
-def C_XOR : CS_ALU<0b01, "c.xor", GPRC, 0>;
-def C_OR : CS_ALU<0b10, "c.or" , GPRC, 0>;
-def C_AND : CS_ALU<0b11, "c.and", GPRC, 0>;
+def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>;
+def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>;
+def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>;
+def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>;
let Predicates = [HasStdExtC, IsRV64] in {
-def C_SUBW : CS_ALU<0b00, "c.subw", GPRC, 1>;
-def C_ADDW : CS_ALU<0b01, "c.addw", GPRC, 1>;
+def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>;
+def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -478,7 +475,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
"c.mv", "$rs1, $rs2">;
-let rs1 = 0, rs2 = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
@@ -517,6 +514,13 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
let Inst{9-7} = imm{8-6};
}
+// The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU
+// binutils as 16-bit instruction known to be unimplemented (i.e., trapping).
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
+ let Inst{15-0} = 0;
+}
+
} // Predicates = [HasStdExtC]
//===----------------------------------------------------------------------===//
@@ -625,6 +629,8 @@ def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
} // Predicates = [HasStdExtC]
let Predicates = [HasStdExtC, IsRV64] in {
+def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm),
+ (C_LI GPRNoX0:$rd, simm6:$imm)>;
def : CompressPat<(SUBW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
(C_SUBW GPRC:$rs1, GPRC:$rs2)>;
def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
@@ -678,6 +684,7 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0),
def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0),
(C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
def : CompressPat<(EBREAK), (C_EBREAK)>;
+def : CompressPat<(UNIMP), (C_UNIMP)>;
def : CompressPat<(JALR X1, GPRNoX0:$rs1, 0),
(C_JALR GPRNoX0:$rs1)>;
def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2),
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 06b834d55ade..9f1cd50de595 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -212,13 +212,8 @@ let Predicates = [HasStdExtD] in {
def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
-
-// [u]int->fp
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
/// Float arithmetic operations
@@ -235,6 +230,22 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
def : PatFpr64Fpr64<fcopysign, FSGNJ_D>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
+ (FMADD_D $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
+ (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
+ (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
+ (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when giving a signaling NaN. This doesn't match the LLVM
// behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -287,3 +298,13 @@ def SplitF64Pseudo
[(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64:$src))]>;
} // Predicates = [HasStdExtD]
+
+let Predicates = [HasStdExtD, IsRV32] in {
+// double->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+
+// [u]int->double.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+} // Predicates = [HasStdExtD, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6d7c59becf24..03bdac45873d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -27,7 +27,7 @@ def FRMArg : AsmOperandClass {
def frmarg : Operand<XLenVT> {
let ParserMatchClass = FRMArg;
let PrintMethod = "printFRMArg";
- let DecoderMethod = "decodeUImmOperand<3>";
+ let DecoderMethod = "decodeFRMArg";
}
//===----------------------------------------------------------------------===//
@@ -252,13 +252,8 @@ let Predicates = [HasStdExtF] in {
def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>;
def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>;
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
-
-// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
/// Float arithmetic operations
@@ -275,6 +270,22 @@ def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
def : PatFpr32Fpr32<fcopysign, FSGNJ_S>;
def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+ (FMADD_S $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+ (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
+ (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
+ (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when given a signaling NaN. This doesn't match the LLVM
// behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -313,3 +324,13 @@ defm : LdPat<load, FLW>;
defm : StPat<store, FSW, FPR32>;
} // Predicates = [HasStdExtF]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// float->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->float. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 2dd10ada4003..05dd3311ad54 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -49,3 +49,34 @@ def : PatGprGpr<udiv, DIVU>;
def : PatGprGpr<srem, REM>;
def : PatGprGpr<urem, REMU>;
} // Predicates = [HasStdExtM]
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
+ (MULW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2)), i32),
+ (DIVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2))),
+ (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
+def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+ (DIVUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a divuw and zero-extend the result than to
+// zero-extend both inputs to a udiv.
+def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+ (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+// Although the sexti32 operands may not have originated from an i32 srem,
+// this pattern is safe as it is impossible for two sign extended inputs to
+// produce a result where res[63:32]=0 and res[31]=1.
+def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
+ (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
+ (sexti32 GPR:$rs2)), i32),
+ (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+ (REMUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a remuw and zero-extend the result than to
+// zero-extend both inputs to a urem.
+def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+ (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+} // Predicates = [HasStdExtM, IsRV64]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index b8fa8a97d41a..cea009c5447d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -62,7 +62,7 @@ private:
MachineRegisterInfo *MRI;
std::set<MachineInstr *> DeadInstrs;
};
-}; // end anonymous namespace
+} // end anonymous namespace
char RISCVMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
new file mode 100644
index 000000000000..f1b7984ffe6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -0,0 +1,352 @@
+//===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the symbolic operands permitted for various kinds of
+// RISC-V system instruction.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/TableGen/SearchableTable.td"
+
+//===----------------------------------------------------------------------===//
+// CSR (control and status register read/write) instruction options.
+//===----------------------------------------------------------------------===//
+
+class SysReg<string name, bits<12> op> {
+ string Name = name;
+ bits<12> Encoding = op;
+ // FIXME: add these additional fields when needed.
+ // Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3.
+ // Privilege Mode: User = 0, System = 1 or Machine = 3.
+ // bits<2> ReadWrite = op{11 - 10};
+ // bits<2> XMode = op{9 - 8};
+ // Check Extra field name and what bits 7-6 correspond to.
+ // bits<2> Extra = op{7 - 6};
+ // Register number without the privilege bits.
+ // bits<6> Number = op{5 - 0};
+ code FeaturesRequired = [{ {} }];
+ bit isRV32Only = 0;
+}
+
+def SysRegsList : GenericTable {
+ let FilterClass = "SysReg";
+ // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
+ let Fields = [ "Name", "Encoding", "FeaturesRequired", "isRV32Only" ];
+
+ let PrimaryKey = [ "Encoding" ];
+ let PrimaryKeyName = "lookupSysRegByEncoding";
+}
+
+def lookupSysRegByName : SearchIndex {
+ let Table = SysRegsList;
+ let Key = [ "Name" ];
+}
+
+// The following CSR encodings match those given in Tables 2.2,
+// 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual
+// Volume II: Privileged Architecture.
+
+//===--------------------------
+// User Trap Setup
+//===--------------------------
+def : SysReg<"ustatus", 0x000>;
+def : SysReg<"uie", 0x004>;
+def : SysReg<"utvec", 0x005>;
+
+//===--------------------------
+// User Trap Handling
+//===--------------------------
+def : SysReg<"uscratch", 0x040>;
+def : SysReg<"uepc", 0x041>;
+def : SysReg<"ucause", 0x042>;
+def : SysReg<"utval", 0x043>;
+def : SysReg<"uip", 0x044>;
+
+//===--------------------------
+// User Floating-Point CSRs
+//===--------------------------
+
+let FeaturesRequired = [{ {RISCV::FeatureStdExtF} }] in {
+def : SysReg<"fflags", 0x001>;
+def : SysReg<"frm", 0x002>;
+def : SysReg<"fcsr", 0x003>;
+}
+
+//===--------------------------
+// User Counter/Timers
+//===--------------------------
+def : SysReg<"cycle", 0xC00>;
+def : SysReg<"time", 0xC01>;
+def : SysReg<"instret", 0xC02>;
+
+def : SysReg<"hpmcounter3", 0xC03>;
+def : SysReg<"hpmcounter4", 0xC04>;
+def : SysReg<"hpmcounter5", 0xC05>;
+def : SysReg<"hpmcounter6", 0xC06>;
+def : SysReg<"hpmcounter7", 0xC07>;
+def : SysReg<"hpmcounter8", 0xC08>;
+def : SysReg<"hpmcounter9", 0xC09>;
+def : SysReg<"hpmcounter10", 0xC0A>;
+def : SysReg<"hpmcounter11", 0xC0B>;
+def : SysReg<"hpmcounter12", 0xC0C>;
+def : SysReg<"hpmcounter13", 0xC0D>;
+def : SysReg<"hpmcounter14", 0xC0E>;
+def : SysReg<"hpmcounter15", 0xC0F>;
+def : SysReg<"hpmcounter16", 0xC10>;
+def : SysReg<"hpmcounter17", 0xC11>;
+def : SysReg<"hpmcounter18", 0xC12>;
+def : SysReg<"hpmcounter19", 0xC13>;
+def : SysReg<"hpmcounter20", 0xC14>;
+def : SysReg<"hpmcounter21", 0xC15>;
+def : SysReg<"hpmcounter22", 0xC16>;
+def : SysReg<"hpmcounter23", 0xC17>;
+def : SysReg<"hpmcounter24", 0xC18>;
+def : SysReg<"hpmcounter25", 0xC19>;
+def : SysReg<"hpmcounter26", 0xC1A>;
+def : SysReg<"hpmcounter27", 0xC1B>;
+def : SysReg<"hpmcounter28", 0xC1C>;
+def : SysReg<"hpmcounter29", 0xC1D>;
+def : SysReg<"hpmcounter30", 0xC1E>;
+def : SysReg<"hpmcounter31", 0xC1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"cycleh", 0xC80>;
+def: SysReg<"timeh", 0xC81>;
+def: SysReg<"instreth", 0xC82>;
+
+def: SysReg<"hpmcounter3h", 0xC83>;
+def: SysReg<"hpmcounter4h", 0xC84>;
+def: SysReg<"hpmcounter5h", 0xC85>;
+def: SysReg<"hpmcounter6h", 0xC86>;
+def: SysReg<"hpmcounter7h", 0xC87>;
+def: SysReg<"hpmcounter8h", 0xC88>;
+def: SysReg<"hpmcounter9h", 0xC89>;
+def: SysReg<"hpmcounter10h", 0xC8A>;
+def: SysReg<"hpmcounter11h", 0xC8B>;
+def: SysReg<"hpmcounter12h", 0xC8C>;
+def: SysReg<"hpmcounter13h", 0xC8D>;
+def: SysReg<"hpmcounter14h", 0xC8E>;
+def: SysReg<"hpmcounter15h", 0xC8F>;
+def: SysReg<"hpmcounter16h", 0xC90>;
+def: SysReg<"hpmcounter17h", 0xC91>;
+def: SysReg<"hpmcounter18h", 0xC92>;
+def: SysReg<"hpmcounter19h", 0xC93>;
+def: SysReg<"hpmcounter20h", 0xC94>;
+def: SysReg<"hpmcounter21h", 0xC95>;
+def: SysReg<"hpmcounter22h", 0xC96>;
+def: SysReg<"hpmcounter23h", 0xC97>;
+def: SysReg<"hpmcounter24h", 0xC98>;
+def: SysReg<"hpmcounter25h", 0xC99>;
+def: SysReg<"hpmcounter26h", 0xC9A>;
+def: SysReg<"hpmcounter27h", 0xC9B>;
+def: SysReg<"hpmcounter28h", 0xC9C>;
+def: SysReg<"hpmcounter29h", 0xC9D>;
+def: SysReg<"hpmcounter30h", 0xC9E>;
+def: SysReg<"hpmcounter31h", 0xC9F>;
+}
+
+//===--------------------------
+// Supervisor Trap Setup
+//===--------------------------
+def : SysReg<"sstatus", 0x100>;
+def : SysReg<"sedeleg", 0x102>;
+def : SysReg<"sideleg", 0x103>;
+def : SysReg<"sie", 0x104>;
+def : SysReg<"stvec", 0x105>;
+def : SysReg<"scounteren", 0x106>;
+
+//===--------------------------
+// Supervisor Trap Handling
+//===--------------------------
+def : SysReg<"sscratch", 0x140>;
+def : SysReg<"sepc", 0x141>;
+def : SysReg<"scause", 0x142>;
+def : SysReg<"stval", 0x143>;
+def : SysReg<"sip", 0x144>;
+
+//===-------------------------------------
+// Supervisor Protection and Translation
+//===-------------------------------------
+def : SysReg<"satp", 0x180>;
+
+//===-----------------------------
+// Machine Information Registers
+//===-----------------------------
+
+def : SysReg<"mvendorid", 0xF11>;
+def : SysReg<"marchid", 0xF12>;
+def : SysReg<"mimpid", 0xF13>;
+def : SysReg<"mhartid", 0xF14>;
+
+//===-----------------------------
+// Machine Trap Setup
+//===-----------------------------
+def : SysReg<"mstatus", 0x300>;
+def : SysReg<"misa", 0x301>;
+def : SysReg<"medeleg", 0x302>;
+def : SysReg<"mideleg", 0x303>;
+def : SysReg<"mie", 0x304>;
+def : SysReg<"mtvec", 0x305>;
+def : SysReg<"mcounteren", 0x306>;
+
+//===-----------------------------
+// Machine Trap Handling
+//===-----------------------------
+def : SysReg<"mscratch", 0x340>;
+def : SysReg<"mepc", 0x341>;
+def : SysReg<"mcause", 0x342>;
+def : SysReg<"mtval", 0x343>;
+def : SysReg<"mip", 0x344>;
+
+//===----------------------------------
+// Machine Protection and Translation
+//===----------------------------------
+def : SysReg<"pmpcfg0", 0x3A0>;
+def : SysReg<"pmpcfg2", 0x3A2>;
+let isRV32Only = 1 in {
+def : SysReg<"pmpcfg1", 0x3A1>;
+def : SysReg<"pmpcfg3", 0x3A3>;
+}
+
+def : SysReg<"pmpaddr0", 0x3B0>;
+def : SysReg<"pmpaddr1", 0x3B1>;
+def : SysReg<"pmpaddr2", 0x3B2>;
+def : SysReg<"pmpaddr3", 0x3B3>;
+def : SysReg<"pmpaddr4", 0x3B4>;
+def : SysReg<"pmpaddr5", 0x3B5>;
+def : SysReg<"pmpaddr6", 0x3B6>;
+def : SysReg<"pmpaddr7", 0x3B7>;
+def : SysReg<"pmpaddr8", 0x3B8>;
+def : SysReg<"pmpaddr9", 0x3B9>;
+def : SysReg<"pmpaddr10", 0x3BA>;
+def : SysReg<"pmpaddr11", 0x3BB>;
+def : SysReg<"pmpaddr12", 0x3BC>;
+def : SysReg<"pmpaddr13", 0x3BD>;
+def : SysReg<"pmpaddr14", 0x3BE>;
+def : SysReg<"pmpaddr15", 0x3BF>;
+
+
+//===--------------------------
+// Machine Counter and Timers
+//===--------------------------
+def : SysReg<"mcycle", 0xB00>;
+def : SysReg<"minstret", 0xB02>;
+
+def : SysReg<"mhpmcounter3", 0xB03>;
+def : SysReg<"mhpmcounter4", 0xB04>;
+def : SysReg<"mhpmcounter5", 0xB05>;
+def : SysReg<"mhpmcounter6", 0xB06>;
+def : SysReg<"mhpmcounter7", 0xB07>;
+def : SysReg<"mhpmcounter8", 0xB08>;
+def : SysReg<"mhpmcounter9", 0xB09>;
+def : SysReg<"mhpmcounter10", 0xB0A>;
+def : SysReg<"mhpmcounter11", 0xB0B>;
+def : SysReg<"mhpmcounter12", 0xB0C>;
+def : SysReg<"mhpmcounter13", 0xB0D>;
+def : SysReg<"mhpmcounter14", 0xB0E>;
+def : SysReg<"mhpmcounter15", 0xB0F>;
+def : SysReg<"mhpmcounter16", 0xB10>;
+def : SysReg<"mhpmcounter17", 0xB11>;
+def : SysReg<"mhpmcounter18", 0xB12>;
+def : SysReg<"mhpmcounter19", 0xB13>;
+def : SysReg<"mhpmcounter20", 0xB14>;
+def : SysReg<"mhpmcounter21", 0xB15>;
+def : SysReg<"mhpmcounter22", 0xB16>;
+def : SysReg<"mhpmcounter23", 0xB17>;
+def : SysReg<"mhpmcounter24", 0xB18>;
+def : SysReg<"mhpmcounter25", 0xB19>;
+def : SysReg<"mhpmcounter26", 0xB1A>;
+def : SysReg<"mhpmcounter27", 0xB1B>;
+def : SysReg<"mhpmcounter28", 0xB1C>;
+def : SysReg<"mhpmcounter29", 0xB1D>;
+def : SysReg<"mhpmcounter30", 0xB1E>;
+def : SysReg<"mhpmcounter31", 0xB1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"mcycleh", 0xB80>;
+def: SysReg<"minstreth", 0xB82>;
+
+def: SysReg<"mhpmcounter3h", 0xB83>;
+def: SysReg<"mhpmcounter4h", 0xB84>;
+def: SysReg<"mhpmcounter5h", 0xB85>;
+def: SysReg<"mhpmcounter6h", 0xB86>;
+def: SysReg<"mhpmcounter7h", 0xB87>;
+def: SysReg<"mhpmcounter8h", 0xB88>;
+def: SysReg<"mhpmcounter9h", 0xB89>;
+def: SysReg<"mhpmcounter10h", 0xB8A>;
+def: SysReg<"mhpmcounter11h", 0xB8B>;
+def: SysReg<"mhpmcounter12h", 0xB8C>;
+def: SysReg<"mhpmcounter13h", 0xB8D>;
+def: SysReg<"mhpmcounter14h", 0xB8E>;
+def: SysReg<"mhpmcounter15h", 0xB8F>;
+def: SysReg<"mhpmcounter16h", 0xB90>;
+def: SysReg<"mhpmcounter17h", 0xB91>;
+def: SysReg<"mhpmcounter18h", 0xB92>;
+def: SysReg<"mhpmcounter19h", 0xB93>;
+def: SysReg<"mhpmcounter20h", 0xB94>;
+def: SysReg<"mhpmcounter21h", 0xB95>;
+def: SysReg<"mhpmcounter22h", 0xB96>;
+def: SysReg<"mhpmcounter23h", 0xB97>;
+def: SysReg<"mhpmcounter24h", 0xB98>;
+def: SysReg<"mhpmcounter25h", 0xB99>;
+def: SysReg<"mhpmcounter26h", 0xB9A>;
+def: SysReg<"mhpmcounter27h", 0xB9B>;
+def: SysReg<"mhpmcounter28h", 0xB9C>;
+def: SysReg<"mhpmcounter29h", 0xB9D>;
+def: SysReg<"mhpmcounter30h", 0xB9E>;
+def: SysReg<"mhpmcounter31h", 0xB9F>;
+}
+
+//===--------------------------
+// Machine Counter Setup
+//===--------------------------
+def : SysReg<"mhpmevent3", 0x323>;
+def : SysReg<"mhpmevent4", 0x324>;
+def : SysReg<"mhpmevent5", 0x325>;
+def : SysReg<"mhpmevent6", 0x326>;
+def : SysReg<"mhpmevent7", 0x327>;
+def : SysReg<"mhpmevent8", 0x328>;
+def : SysReg<"mhpmevent9", 0x329>;
+def : SysReg<"mhpmevent10", 0x32A>;
+def : SysReg<"mhpmevent11", 0x32B>;
+def : SysReg<"mhpmevent12", 0x32C>;
+def : SysReg<"mhpmevent13", 0x32D>;
+def : SysReg<"mhpmevent14", 0x32E>;
+def : SysReg<"mhpmevent15", 0x32F>;
+def : SysReg<"mhpmevent16", 0x330>;
+def : SysReg<"mhpmevent17", 0x331>;
+def : SysReg<"mhpmevent18", 0x332>;
+def : SysReg<"mhpmevent19", 0x333>;
+def : SysReg<"mhpmevent20", 0x334>;
+def : SysReg<"mhpmevent21", 0x335>;
+def : SysReg<"mhpmevent22", 0x336>;
+def : SysReg<"mhpmevent23", 0x337>;
+def : SysReg<"mhpmevent24", 0x338>;
+def : SysReg<"mhpmevent25", 0x339>;
+def : SysReg<"mhpmevent26", 0x33A>;
+def : SysReg<"mhpmevent27", 0x33B>;
+def : SysReg<"mhpmevent28", 0x33C>;
+def : SysReg<"mhpmevent29", 0x33D>;
+def : SysReg<"mhpmevent30", 0x33E>;
+def : SysReg<"mhpmevent31", 0x33F>;
+
+//===-----------------------------------------------
+// Debug/ Trace Registers (shared with Debug Mode)
+//===-----------------------------------------------
+def : SysReg<"tselect", 0x7A0>;
+def : SysReg<"tdata1", 0x7A1>;
+def : SysReg<"tdata2", 0x7A2>;
+def : SysReg<"tdata3", 0x7A3>;
+
+//===-----------------------------------------------
+// Debug Mode Registers
+//===-----------------------------------------------
+def : SysReg<"dcsr", 0x7B0>;
+def : SysReg<"dpc", 0x7B1>;
+def : SysReg<"dscratch", 0x7B2>;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index a2ebf5bf3e6b..8937ec200bd7 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,8 @@ using namespace llvm;
extern "C" void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+ auto PR = PassRegistry::getPassRegistry();
+ initializeRISCVExpandPseudoPass(*PR);
}
static std::string computeDataLayout(const Triple &TT) {
@@ -45,12 +47,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -59,7 +55,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<RISCVELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -78,6 +74,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
+ void addPreEmitPass2() override;
void addPreRegAlloc() override;
};
}
@@ -99,6 +96,13 @@ bool RISCVPassConfig::addInstSelector() {
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+void RISCVPassConfig::addPreEmitPass2() {
+ // Schedule the expansion of AMOs at the last possible moment, avoiding the
+ // possibility for other passes to break the requirements for forward
+ // progress in the LR/SC block.
+ addPass(createRISCVExpandPseudoPass());
+}
+
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
}
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
new file mode 100644
index 000000000000..964af1f74cec
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -0,0 +1,9 @@
+#include "RISCVBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace RISCVSysReg {
+#define GET_SysRegsList_IMPL
+#include "RISCVGenSystemOperands.inc"
+} // namespace RISCVSysReg
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index b278a2ed3903..372e0e80bbaf 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -14,9 +14,10 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
-#include "RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/SubtargetFeature.h"
namespace llvm {
@@ -38,9 +39,10 @@ enum {
InstFormatCIW = 11,
InstFormatCL = 12,
InstFormatCS = 13,
- InstFormatCB = 14,
- InstFormatCJ = 15,
- InstFormatOther = 16,
+ InstFormatCA = 14,
+ InstFormatCB = 15,
+ InstFormatCJ = 16,
+ InstFormatOther = 17,
InstFormatMask = 31
};
@@ -104,7 +106,53 @@ inline static RoundingMode stringToRoundingMode(StringRef Str) {
.Case("dyn", RISCVFPRndMode::DYN)
.Default(RISCVFPRndMode::Invalid);
}
+
+inline static bool isValidRoundingMode(unsigned Mode) {
+ switch (Mode) {
+ default:
+ return false;
+ case RISCVFPRndMode::RNE:
+ case RISCVFPRndMode::RTZ:
+ case RISCVFPRndMode::RDN:
+ case RISCVFPRndMode::RUP:
+ case RISCVFPRndMode::RMM:
+ case RISCVFPRndMode::DYN:
+ return true;
+ }
+}
} // namespace RISCVFPRndMode
+
+namespace RISCVSysReg {
+struct SysReg {
+ const char *Name;
+ unsigned Encoding;
+ // FIXME: add these additional fields when needed.
+ // Privilege Access: Read, Write, Read-Only.
+ // unsigned ReadWrite;
+ // Privilege Mode: User, System or Machine.
+ // unsigned Mode;
+ // Check field name.
+ // unsigned Extra;
+ // Register number without the privilege bits.
+ // unsigned Number;
+ FeatureBitset FeaturesRequired;
+ bool isRV32Only;
+
+ bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const {
+ // Not in 32-bit mode.
+ if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+ return false;
+ // No required feature associated with the system register.
+ if (FeaturesRequired.none())
+ return true;
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+};
+
+#define GET_SysRegsList_DECL
+#include "RISCVGenSystemOperands.inc"
+} // end namespace RISCVSysReg
+
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
new file mode 100644
index 000000000000..3dc298246bc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -0,0 +1,79 @@
+//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMatInt.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+ if (isInt<32>(Val)) {
+ // Depending on the active bits in the immediate Value v, the following
+ // instruction sequences are emitted:
+ //
+ // v == 0 : ADDI
+ // v[0,12) != 0 && v[12,32) == 0 : ADDI
+ // v[0,12) == 0 && v[12,32) != 0 : LUI
+ // v[0,32) != 0 : LUI+ADDI(W)
+ int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
+ int64_t Lo12 = SignExtend64<12>(Val);
+
+ if (Hi20)
+ Res.push_back(Inst(RISCV::LUI, Hi20));
+
+ if (Lo12 || Hi20 == 0) {
+ unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+ Res.push_back(Inst(AddiOpc, Lo12));
+ }
+ return;
+ }
+
+ assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+
+ // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
+ // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+ // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
+ // while the following ADDI instructions contribute up to 12 bits each.
+ //
+ // On the first glance, implementing this seems to be possible by simply
+ // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
+ // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
+ // fact that ADDI performs a sign extended addition, doing it like that would
+ // only be possible when at most 11 bits of the ADDI instructions are used.
+ // Using all 12 bits of the ADDI instructions, like done by GAS, actually
+ // requires that the constant is processed starting with the least significant
+ // bit.
+ //
+ // In the following, constants are processed from LSB to MSB but instruction
+ // emission is performed from MSB to LSB by recursively calling
+ // generateInstSeq. In each recursion, first the lowest 12 bits are removed
+ // from the constant and the optimal shift amount, which can be greater than
+ // 12 bits if the constant is sparse, is determined. Then, the shifted
+ // remaining constant is processed recursively and gets emitted as soon as it
+ // fits into 32 bits. The emission of the shifts and additions is subsequently
+ // performed when the recursion returns.
+
+ int64_t Lo12 = SignExtend64<12>(Val);
+ int64_t Hi52 = (Val + 0x800) >> 12;
+ int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
+ Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+ generateInstSeq(Hi52, Is64Bit, Res);
+
+ Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
+ if (Lo12)
+ Res.push_back(Inst(RISCV::ADDI, Lo12));
+}
+} // namespace RISCVMatInt
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
new file mode 100644
index 000000000000..49d1d89adc7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
@@ -0,0 +1,36 @@
+//===- RISCVMatInt.h - Immediate materialisation ---------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
+#define LLVM_LIB_TARGET_RISCV_MATINT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+struct Inst {
+ unsigned Opc;
+ int64_t Imm;
+
+ Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
+};
+using InstSeq = SmallVector<Inst, 8>;
+
+// Helper to generate an instruction sequence that will materialise the given
+// immediate value into a register. A sequence of instructions represented by
+// a simple struct produced rather than directly emitting the instructions in
+// order to allow this helper to be used from both the MC layer and during
+// instruction selection.
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+} // namespace RISCVMatInt
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 35f52f7d279b..691421e533ea 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -78,6 +78,8 @@ class SparcAsmParser : public MCTargetAsmParser {
// Custom parse functions for Sparc specific operands.
OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
+ OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+
OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
OperandMatchResultTy
@@ -256,6 +258,7 @@ public:
bool isMem() const override { return isMEMrr() || isMEMri(); }
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
+ bool isMembarTag() const { return Kind == k_Immediate; }
bool isIntReg() const {
return (Kind == k_Register && Reg.Kind == rk_IntReg);
@@ -366,6 +369,12 @@ public:
addExpr(Inst, Expr);
}
+ void addMembarTagOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCExpr *Expr = getImm();
+ addExpr(Inst, Expr);
+ }
+
static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
auto Op = make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
@@ -742,6 +751,52 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *EVal;
+ int64_t ImmVal = 0;
+
+ std::unique_ptr<SparcOperand> Mask;
+ if (parseSparcAsmOperand(Mask) == MatchOperand_Success) {
+ if (!Mask->isImm() || !Mask->getImm()->evaluateAsAbsolute(ImmVal) ||
+ ImmVal < 0 || ImmVal > 127) {
+ Error(S, "invalid membar mask number");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ while (getLexer().getKind() == AsmToken::Hash) {
+ SMLoc TagStart = getLexer().getLoc();
+ Parser.Lex(); // Eat the '#'.
+ unsigned MaskVal = StringSwitch<unsigned>(Parser.getTok().getString())
+ .Case("LoadLoad", 0x1)
+ .Case("StoreLoad", 0x2)
+ .Case("LoadStore", 0x4)
+ .Case("StoreStore", 0x8)
+ .Case("Lookaside", 0x10)
+ .Case("MemIssue", 0x20)
+ .Case("Sync", 0x40)
+ .Default(0);
+
+ Parser.Lex(); // Eat the identifier token.
+
+ if (!MaskVal) {
+ Error(TagStart, "unknown membar tag");
+ return MatchOperand_ParseFail;
+ }
+
+ ImmVal |= MaskVal;
+
+ if (getLexer().getKind() == AsmToken::Pipe)
+ Parser.Lex(); // Eat the '|'.
+ }
+
+ EVal = MCConstantExpr::create(ImmVal, getContext());
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SparcOperand::CreateImm(EVal, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e30dae1537f..0045e63a824e 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -11,9 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
-#include "SparcRegisterInfo.h"
-#include "SparcSubtarget.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index c1512cbdc44f..d152efae6d1f 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -195,3 +195,26 @@ bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
return true;
}
+
+void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ static const char *const TagNames[] = {
+ "#LoadLoad", "#StoreLoad", "#LoadStore", "#StoreStore",
+ "#Lookaside", "#MemIssue", "#Sync"};
+
+ unsigned Imm = MI->getOperand(opNum).getImm();
+
+ if (Imm > 127) {
+ O << Imm;
+ return;
+ }
+
+ bool First = true;
+ for (unsigned i = 0; i < sizeof(TagNames) / sizeof(char *); i++) {
+ if (Imm & (1 << i)) {
+ O << (First ? "" : " | ") << TagNames[i];
+ First = false;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 6f06d1ddae32..89015eb137c2 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -49,6 +49,8 @@ public:
raw_ostream &OS);
bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &OS);
+ void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
index a7dea068cb11..61e5f16e0a1e 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
@@ -58,3 +58,7 @@ def FixAllFDIVSQRT : SubtargetFeature<
"true",
"LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store"
>;
+
+def LeonCycleCounter
+ : SubtargetFeature<"leoncyclecounter", "HasLeonCycleCounter", "true",
+ "Use the Leon cycle counter register">;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 5f5e2ef7d45a..d7f1e3a1ab1d 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -100,6 +100,20 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
}
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return 4;
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ return 2;
+ case FK_Data_8:
+ return 8;
+ }
+}
+
namespace {
class SparcAsmBackend : public MCAsmBackend {
protected:
@@ -290,13 +304,13 @@ namespace {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
unsigned Offset = Fixup.getOffset();
-
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
- for (unsigned i = 0; i != 4; ++i) {
- unsigned Idx = Endian == support::little ? i : 3 - i;
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index f736a37a266c..4ddb72643a91 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "SparcMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Object/ELF.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
index 3b503503abce..8bb418e39ab4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
-#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
@@ -33,7 +33,6 @@ public:
SparcTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
void emitSparcRegisterIgnore(unsigned reg) override;
void emitSparcRegisterScratch(unsigned reg) override;
-
};
// This part is for ELF object output
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
index 2f9b57f76041..0412215be8ab 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -49,6 +49,9 @@ def FeatureVIS3
def FeatureLeon
: SubtargetFeature<"leon", "IsLeon", "true",
"Enable LEON extensions">;
+def FeaturePWRPSR
+ : SubtargetFeature<"leonpwrpsr", "HasPWRPSR", "true",
+ "Enable the PWRPSR instruction">;
def FeatureHardQuad
: SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
@@ -159,7 +162,8 @@ def : Processor<"leon4", LEON4Itineraries,
// LEON 4 FT (GR740)
// TO DO: Place-holder: Processor specific features will be added *very* soon here.
def : Processor<"gr740", LEON4Itineraries,
- [FeatureLeon, UMACSMACSupport, LeonCASA]>;
+ [FeatureLeon, UMACSMACSupport, LeonCASA, LeonCycleCounter,
+ FeaturePWRPSR]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 19fb94534b25..5f0e359a3b00 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -14,10 +14,10 @@
#include "InstPrinter/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcTargetStreamer.h"
#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
-#include "SparcTargetStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index b04c6b112682..ae2257618a55 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -780,6 +780,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
const unsigned StackOffset = 92;
bool hasStructRetAttr = false;
+ unsigned SRetArgSize = 0;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
i != e;
@@ -824,6 +825,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
hasStructRetAttr = true;
+ // sret only allowed on first argument
+ assert(Outs[realArgIdx].OrigArgIndex == 0);
+ PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
+ Type *ElementTy = Ty->getElementType();
+ SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
continue;
}
@@ -846,12 +852,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (VA.getLocVT() == MVT::f64) {
// Move from the float value from float registers into the
// integer registers.
-
- // TODO: The f64 -> v2i32 conversion is super-inefficient for
- // constants: it sticks them in the constant pool, then loads
- // to a fp register, then stores to temp memory, then loads to
- // integer registers.
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
+ Arg = bitcastConstantFPToInt(C, dl, DAG);
+ else
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
}
SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -932,7 +936,6 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
- unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -1032,51 +1035,6 @@ unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
report_fatal_error("Invalid register name global variable");
}
-// This functions returns true if CalleeName is a ABI function that returns
-// a long double (fp128).
-static bool isFP128ABICall(const char *CalleeName)
-{
- static const char *const ABICalls[] =
- { "_Q_add", "_Q_sub", "_Q_mul", "_Q_div",
- "_Q_sqrt", "_Q_neg",
- "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
- "_Q_lltoq", "_Q_ulltoq",
- nullptr
- };
- for (const char * const *I = ABICalls; *I != nullptr; ++I)
- if (strcmp(CalleeName, *I) == 0)
- return true;
- return false;
-}
-
-unsigned
-SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
-{
- const Function *CalleeFn = nullptr;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- CalleeFn = dyn_cast<Function>(G->getGlobal());
- } else if (ExternalSymbolSDNode *E =
- dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const Function &F = DAG.getMachineFunction().getFunction();
- const Module *M = F.getParent();
- const char *CalleeName = E->getSymbol();
- CalleeFn = M->getFunction(CalleeName);
- if (!CalleeFn && isFP128ABICall(CalleeName))
- return 16; // Return sizeof(fp128)
- }
-
- if (!CalleeFn)
- return 0;
-
- // It would be nice to check for the sret attribute on CalleeFn here,
- // but since it is not part of the function type, any check will misfire.
-
- PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
- Type *ElementTy = Ty->getElementType();
- return DAG.getDataLayout().getTypeAllocSize(ElementTy);
-}
-
-
// Fixup floating point arguments in the ... part of a varargs call.
//
// The SPARC v9 ABI requires that floating point arguments are treated the same
@@ -1587,9 +1545,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
-
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
@@ -1841,6 +1796,13 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}
+ // Custom combine bitcast between f64 and v2i32
+ if (!Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::BITCAST);
+
+ if (Subtarget->hasLeonCycleCounter())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setMinFunctionAlignment(2);
@@ -1863,8 +1825,6 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
- case SPISD::EH_SJLJ_SETJMP: return "SPISD::EH_SJLJ_SETJMP";
- case SPISD::EH_SJLJ_LONGJMP: return "SPISD::EH_SJLJ_LONGJMP";
case SPISD::Hi: return "SPISD::Hi";
case SPISD::Lo: return "SPISD::Lo";
case SPISD::FTOI: return "SPISD::FTOI";
@@ -1906,8 +1866,8 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
case SPISD::SELECT_ICC:
case SPISD::SELECT_XCC:
case SPISD::SELECT_FCC:
- DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
- DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2537,20 +2497,6 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
}
-SDValue SparcTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const {
- SDLoc DL(Op);
- return DAG.getNode(SPISD::EH_SJLJ_SETJMP, DL,
- DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1));
-
-}
-
-SDValue SparcTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const {
- SDLoc DL(Op);
- return DAG.getNode(SPISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1));
-}
-
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
@@ -2666,7 +2612,8 @@ static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
}
static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
- const SparcSubtarget *Subtarget) {
+ const SparcSubtarget *Subtarget,
+ bool AlwaysFlush = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -2676,17 +2623,11 @@ static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
unsigned stackBias = Subtarget->getStackPointerBias();
SDValue FrameAddr;
-
- if (depth == 0) {
- FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
- if (Subtarget->is64Bit())
- FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
- DAG.getIntPtrConstant(stackBias, dl));
- return FrameAddr;
- }
+ SDValue Chain;
// flush first to make sure the windowed registers' values are in stack
- SDValue Chain = getFLUSHW(Op, DAG);
+ Chain = (depth || AlwaysFlush) ? getFLUSHW(Op, DAG) : DAG.getEntryNode();
+
FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
unsigned Offset = (Subtarget->is64Bit()) ? (stackBias + 112) : 56;
@@ -2735,7 +2676,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
}
// Need frame address to find return address of the caller.
- SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget);
+ SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget, true);
unsigned Offset = (Subtarget->is64Bit()) ? 120 : 60;
SDValue Ptr = DAG.getNode(ISD::ADD,
@@ -3085,8 +3026,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
hasHardQuad);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
hasHardQuad);
- case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG, *this);
- case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG, *this);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
@@ -3120,6 +3059,40 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ APInt V = C->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
+ if (DAG.getDataLayout().isLittleEndian())
+ std::swap(Lo, Hi);
+ return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
+}
+
+SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDLoc dl(N);
+ SDValue Src = N->getOperand(0);
+
+ if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
+ Src.getSimpleValueType() == MVT::f64)
+ return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
+
+ return SDValue();
+}
+
+SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BITCAST:
+ return PerformBITCASTCombine(N, DCI);
+ }
+ return SDValue();
+}
+
MachineBasicBlock *
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -3135,13 +3108,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case SP::SELECT_CC_DFP_FCC:
case SP::SELECT_CC_QFP_FCC:
return expandSelectCC(MI, BB, SP::FBCOND);
- case SP::EH_SJLJ_SETJMP32ri:
- case SP::EH_SJLJ_SETJMP32rr:
- return emitEHSjLjSetJmp(MI, BB);
- case SP::EH_SJLJ_LONGJMP32rr:
- case SP::EH_SJLJ_LONGJMP32ri:
- return emitEHSjLjLongJmp(MI, BB);
-
}
}
@@ -3201,205 +3167,6 @@ SparcTargetLowering::expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
return SinkMBB;
}
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstrBuilder MIB;
-
- MVT PVT = getPointerTy(MF->getDataLayout());
- unsigned RegSize = PVT.getStoreSize();
- assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
- unsigned Buf = MI.getOperand(0).getReg();
- unsigned JmpLoc = MRI.createVirtualRegister(&SP::IntRegsRegClass);
-
- // TO DO: If we do 64-bit handling, this perhaps should be FLUSHW, not TA 3
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::TRAPri), SP::G0).addImm(3).addImm(SPCC::ICC_A);
-
- // Instruction to restore FP
- const unsigned FP = SP::I6;
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(FP)
- .addReg(Buf)
- .addImm(0);
-
- // Instruction to load jmp location
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(JmpLoc, RegState::Define)
- .addReg(Buf)
- .addImm(RegSize);
-
- // Instruction to restore SP
- const unsigned SP = SP::O6;
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(SP)
- .addReg(Buf)
- .addImm(2 * RegSize);
-
- // Instruction to restore I7
- MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
- .addReg(SP::I7)
- .addReg(Buf, RegState::Kill)
- .addImm(3 * RegSize);
-
- // Jump to JmpLoc
- BuildMI(*MBB, MI, DL, TII->get(SP::JMPLrr)).addReg(SP::G0).addReg(JmpLoc, RegState::Kill).addReg(SP::G0);
-
- MI.eraseFromParent();
- return MBB;
-}
-
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstrBuilder MIB;
-
- MVT PVT = getPointerTy(MF->getDataLayout());
- unsigned RegSize = PVT.getStoreSize();
- assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
- unsigned DstReg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
- (void)TRI;
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned restoreDstReg = MRI.createVirtualRegister(RC);
-
- // For v = setjmp(buf), we generate
- //
- // thisMBB:
- // buf[0] = FP
- // buf[RegSize] = restoreMBB <-- takes address of restoreMBB
- // buf[RegSize * 2] = O6
- // buf[RegSize * 3] = I7
- // Ensure restoreMBB remains in the relocations list (done using a bn instruction)
- // b mainMBB
- //
- // mainMBB:
- // v_main = 0
- // b sinkMBB
- //
- // restoreMBB:
- // v_restore = 1
- // --fall through--
- //
- // sinkMBB:
- // v = phi(main, restore)
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator It = ++MBB->getIterator();
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
-
- MF->insert(It, mainMBB);
- MF->insert(It, restoreMBB);
- MF->insert(It, sinkMBB);
- restoreMBB->setHasAddressTaken();
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)),
- MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- unsigned LabelReg = MRI.createVirtualRegister(&SP::IntRegsRegClass);
- unsigned LabelReg2 = MRI.createVirtualRegister(&SP::IntRegsRegClass);
- unsigned BufReg = MI.getOperand(1).getReg();
-
- // Instruction to store FP
- const unsigned FP = SP::I6;
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(0)
- .addReg(FP);
-
- // Instructions to store jmp location
- MIB = BuildMI(thisMBB, DL, TII->get(SP::SETHIi))
- .addReg(LabelReg, RegState::Define)
- .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_HI);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::ORri))
- .addReg(LabelReg2, RegState::Define)
- .addReg(LabelReg, RegState::Kill)
- .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_LO);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(RegSize)
- .addReg(LabelReg2, RegState::Kill);
-
- // Instruction to store SP
- const unsigned SP = SP::O6;
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(2 * RegSize)
- .addReg(SP);
-
- // Instruction to store I7
- MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
- .addReg(BufReg)
- .addImm(3 * RegSize)
- .addReg(SP::I7);
-
-
- // FIX ME: This next instruction ensures that the restoreMBB block address remains
- // valid through optimization passes and serves no other purpose. The ICC_N ensures
- // that the branch is never taken. This commented-out code here was an alternative
- // attempt to achieve this which brought myriad problems.
- //MIB = BuildMI(thisMBB, DL, TII->get(SP::EH_SjLj_Setup)).addMBB(restoreMBB, SparcMCExpr::VK_Sparc_None);
- MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
- .addMBB(restoreMBB)
- .addImm(SPCC::ICC_N);
-
- MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
- .addMBB(mainMBB)
- .addImm(SPCC::ICC_A);
-
- thisMBB->addSuccessor(mainMBB);
- thisMBB->addSuccessor(restoreMBB);
-
-
- // mainMBB:
- MIB = BuildMI(mainMBB, DL, TII->get(SP::ORrr))
- .addReg(mainDstReg, RegState::Define)
- .addReg(SP::G0)
- .addReg(SP::G0);
- MIB = BuildMI(mainMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
-
- mainMBB->addSuccessor(sinkMBB);
-
-
- // restoreMBB:
- MIB = BuildMI(restoreMBB, DL, TII->get(SP::ORri))
- .addReg(restoreDstReg, RegState::Define)
- .addReg(SP::G0)
- .addImm(1);
- //MIB = BuildMI(restoreMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
- restoreMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- MIB = BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(SP::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(restoreDstReg).addMBB(restoreMBB);
-
- MI.eraseFromParent();
- return sinkMBB;
-}
-
//===----------------------------------------------------------------------===//
// Sparc Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -3494,23 +3261,23 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
else
return std::make_pair(0U, &SP::IntRegsRegClass);
case 'f':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &SP::LowDFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::LowQFPRegsRegClass);
- llvm_unreachable("Unknown ValueType for f-register-type!");
- break;
+ // This will generate an error message
+ return std::make_pair(0U, nullptr);
case 'e':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return std::make_pair(0U, &SP::FPRegsRegClass);
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64 )
return std::make_pair(0U, &SP::DFPRegsRegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &SP::QFPRegsRegClass);
- llvm_unreachable("Unknown ValueType for e-register-type!");
- break;
+ // This will generate an error message
+ return std::make_pair(0U, nullptr);
}
} else if (!Constraint.empty() && Constraint.size() <= 5
&& Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
@@ -3587,7 +3354,16 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
getLibcallName(libCall),
1));
return;
-
+ case ISD::READCYCLECOUNTER: {
+ assert(Subtarget->hasLeonCycleCounter());
+ SDValue Lo = DAG.getCopyFromReg(N->getOperand(0), dl, SP::ASR23, MVT::i32);
+ SDValue Hi = DAG.getCopyFromReg(Lo, dl, SP::G0, MVT::i32);
+ SDValue Ops[] = { Lo, Hi };
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops);
+ Results.push_back(Pair);
+ Results.push_back(N->getOperand(0));
+ return;
+ }
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// Custom lower only if it involves f128 or i64.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index 0cbbda787881..718851db25bf 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -33,9 +33,6 @@ namespace llvm {
SELECT_XCC, // Select between two values using the current XCC flags.
SELECT_FCC, // Select between two values using the current FCC flags.
- EH_SJLJ_SETJMP, // builtin setjmp operation
- EH_SJLJ_LONGJMP, // builtin longjmp operation
-
Hi, Lo, // Hi/Lo operations, typically on a global address.
FTOI, // FP to Int within a FP register.
@@ -171,12 +168,6 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const ;
- SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI) const ;
-
- unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
@@ -192,6 +183,13 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
@@ -213,10 +211,6 @@ namespace llvm {
MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
- MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
- MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index f6518c936ebc..0b94c6b614eb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -239,7 +239,7 @@ let Predicates = [Is64Bit] in {
let DecoderMethod = "DecodeLoadInt" in
defm LDX : Load<"ldx", 0b001011, load, I64Regs, i64>;
-let mayLoad = 1, isCodeGenOnly = 1, isAsmParserOnly = 1 in
+let mayLoad = 1, isAsmParserOnly = 1 in
def TLS_LDXrr : F3_1<3, 0b001011,
(outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
"ldx [$addr], $dst, $sym",
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 352090ed92c1..35987390d7ba 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -470,10 +470,15 @@ def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"pwr $rs2, %psr", (PWRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"pwr $simm13, %psr", (PWRPSRri G0, i32imm:$simm13), 0>;
// flush -> flush %g0
def : InstAlias<"flush", (FLUSH), 0>;
+// unimp -> unimp 0
+def : InstAlias<"unimp", (UNIMP 0), 0>;
+
def : MnemonicAlias<"iflush", "flush">;
def : MnemonicAlias<"stub", "stb">;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5b7fb3c485e8..558b37aeebcb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -56,6 +56,11 @@ def HasHardQuad : Predicate<"Subtarget->hasHardQuad()">;
// instruction
def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
+// HasPWRPSR - This is true when the target processor supports partial
+// writes to the PSR register that only affects the ET field.
+def HasPWRPSR : Predicate<"Subtarget->hasPWRPSR()">,
+ AssemblerPredicate<"FeaturePWRPSR">;
+
// HasUMAC_SMAC - This is true when the target processor supports the
// UMAC and SMAC instructions
def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
@@ -89,10 +94,22 @@ def HI22 : SDNodeXForm<imm, [{
MVT::i32);
}]>;
+// Return the complement of a HI22 immediate value.
+def HI22_not : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~(unsigned)N->getZExtValue() >> 10, SDLoc(N),
+ MVT::i32);
+}]>;
+
def SETHIimm : PatLeaf<(imm), [{
return isShiftedUInt<22, 10>(N->getZExtValue());
}], HI22>;
+// The N->hasOneUse() prevents the immediate from being instantiated in both
+// normal and complement form.
+def SETHIimm_not : PatLeaf<(i32 imm), [{
+ return N->hasOneUse() && isShiftedUInt<22, 10>(~(unsigned)N->getZExtValue());
+}], HI22_not>;
+
// Addressing modes.
def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
@@ -121,6 +138,16 @@ def MEMri : Operand<iPTR> {
def TLSSym : Operand<iPTR>;
+def SparcMembarTagAsmOperand : AsmOperandClass {
+ let Name = "MembarTag";
+ let ParserMethod = "parseMembarTag";
+}
+
+def MembarTag : Operand<i32> {
+ let PrintMethod = "printMembarTag";
+ let ParserMatchClass = SparcMembarTagAsmOperand;
+}
+
// Branch targets have OtherVT type.
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
@@ -169,9 +196,6 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDTSPtlsld :
SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
-def SDTSPeh_sjlj_setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
-def SDTSPeh_sjlj_longjmp: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
@@ -190,13 +214,6 @@ def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
-def SPsjlj_setjmp: SDNode<"SPISD::EH_SJLJ_SETJMP",
- SDTSPeh_sjlj_setjmp,
- [SDNPHasChain, SDNPSideEffect]>;
-def SPsjlj_longjmp: SDNode<"SPISD::EH_SJLJ_LONGJMP",
- SDTSPeh_sjlj_longjmp,
- [SDNPHasChain, SDNPSideEffect]>;
-
// These are target-independent nodes, but have target-specific formats.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -473,27 +490,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in {
[(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
}
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
- let Defs = [WIM] in
- def EH_SJLJ_SETJMP32ri : Pseudo<(outs IntRegs:$dst), (ins MEMri:$buf),
- "#EH_SJLJ_SETJMP32",
- [(set i32:$dst, (SPsjlj_setjmp ADDRri:$buf))]>,
- Requires<[Is32Bit]>;
- def EH_SJLJ_SETJMP32rr : Pseudo<(outs IntRegs:$dst), (ins MEMrr:$buf),
- "#EH_SJLJ_SETJMP32",
- [(set i32:$dst, (SPsjlj_setjmp ADDRrr:$buf))]>,
- Requires<[Is32Bit]>;
- let isTerminator = 1 in
- def EH_SJLJ_LONGJMP32ri : Pseudo<(outs), (ins MEMri:$buf),
- "#EH_SJLJ_LONGJMP32",
- [(SPsjlj_longjmp ADDRri:$buf)]>,
- Requires<[Is32Bit]>;
- def EH_SJLJ_LONGJMP32rr : Pseudo<(outs), (ins MEMrr:$buf),
- "#EH_SJLJ_LONGJMP32",
- [(SPsjlj_longjmp ADDRrr:$buf)]>,
- Requires<[Is32Bit]>;
-}
-
// Section B.1 - Load Integer Instructions, p. 90
let DecoderMethod = "DecodeLoadInt" in {
defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8, IntRegs, i32>;
@@ -680,6 +676,12 @@ def XNORri : F3_2<2, 0b000111,
(outs IntRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
"xnor $rs1, $simm13, $rd", []>;
+def : Pat<(and IntRegs:$rs1, SETHIimm_not:$rs2),
+ (ANDNrr i32:$rs1, (SETHIi SETHIimm_not:$rs2))>;
+
+def : Pat<(or IntRegs:$rs1, SETHIimm_not:$rs2),
+ (ORNrr i32:$rs1, (SETHIi SETHIimm_not:$rs2))>;
+
let Defs = [ICC] in {
defm ANDCC : F3_12np<"andcc", 0b010001>;
defm ANDNCC : F3_12np<"andncc", 0b010101>;
@@ -1316,7 +1318,7 @@ let Defs = [FCC0], rd = 0, isCodeGenOnly = 1 in {
//===----------------------------------------------------------------------===//
// Instructions for Thread Local Storage(TLS).
//===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in {
def TLS_ADDrr : F3_1<2, 0b000000,
(outs IntRegs:$rd),
(ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
@@ -1511,7 +1513,7 @@ def : Pat<(ctpop i32:$src),
(POPCrr (SRLri $src, 0))>;
let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
- def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13),
+ def MEMBARi : F3_2<2, 0b101000, (outs), (ins MembarTag:$simm13),
"membar $simm13", []>;
// The CAS instruction, unlike other instructions, only comes in a
@@ -1569,6 +1571,17 @@ let Predicates = [HasUMAC_SMAC], Defs = [Y, ASR18], Uses = [Y, ASR18] in {
[], IIC_smac_umac>;
}
+// The partial write WRPSR instruction has a non-zero destination
+// register value to separate it from the standard instruction.
+let Predicates = [HasPWRPSR], Defs = [PSR], rd=1 in {
+ def PWRPSRrr : F3_1<2, 0b110001,
+ (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+ "pwr $rs1, $rs2, %psr", []>;
+ def PWRPSRri : F3_2<2, 0b110001,
+ (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+ "pwr $rs1, $simm13, %psr", []>;
+}
+
let Defs = [ICC] in {
defm TADDCC : F3_12np<"taddcc", 0b100000>;
defm TSUBCC : F3_12np<"tsubcc", 0b100001>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index b9647eaa3d51..33caa66154ff 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -95,6 +95,10 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ // Reserve ASR1-ASR31
+ for (unsigned n = 0; n < 31; n++)
+ Reserved.set(SP::ASR1 + n);
+
return Reserved;
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 2a279dad5ae2..8dd2569d10de 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -35,8 +35,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 40c5683f8495..5301fc30a006 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -44,9 +44,11 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
// Leon features
HasLeonCasa = false;
HasUmacSmac = false;
+ HasPWRPSR = false;
InsertNOPLoad = false;
FixAllFDIVSQRT = false;
DetectRoundChange = false;
+ HasLeonCycleCounter = false;
// Determine default and user specified characteristics
std::string CPUName = CPU;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 588a6765bcdf..24ea41a266e7 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -47,9 +47,11 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
// LEON features
bool HasUmacSmac;
bool HasLeonCasa;
+ bool HasPWRPSR;
bool InsertNOPLoad;
bool FixAllFDIVSQRT;
bool DetectRoundChange;
+ bool HasLeonCycleCounter;
SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
@@ -92,9 +94,11 @@ public:
// Leon options
bool hasUmacSmac() const { return HasUmacSmac; }
bool hasLeonCasa() const { return HasLeonCasa; }
+ bool hasPWRPSR() const { return HasPWRPSR; }
bool insertNOPLoad() const { return InsertNOPLoad; }
bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
bool detectRoundChange() const { return DetectRoundChange; }
+ bool hasLeonCycleCounter() const { return HasLeonCycleCounter; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 07f9e7250bd9..5b467235f809 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -70,11 +70,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
// pic32 PIC_ Medium GOT < 2^32 bytes
//
// All code models require that the text segment is smaller than 2GB.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool Is64Bit,
- bool JIT) {
- if (CM)
+static CodeModel::Model
+getEffectiveSparcCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+ bool Is64Bit, bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (Is64Bit) {
if (JIT)
return CodeModel::Large;
@@ -88,11 +93,11 @@ SparcTargetMachine::SparcTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Optional<Reloc::Model> RM,
Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT, bool is64bit)
- : LLVMTargetMachine(
- T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
- getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), is64bit, JIT),
- OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM),
+ getEffectiveSparcCodeModel(
+ CM, getEffectiveRelocModel(RM), is64bit, JIT),
+ OL),
TLOF(make_unique<SparcELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index b0d76abeba7d..d1eb1d329a4c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -40,10 +40,6 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
/// Sparc 32-bit target machine
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index bde067d6c129..91959b4151b3 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "InstPrinter/SystemZInstPrinter.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -243,6 +244,11 @@ public:
return Kind == KindImmTLS;
}
+ const ImmTLSOp getImmTLS() const {
+ assert(Kind == KindImmTLS && "Not a TLS immediate");
+ return ImmTLS;
+ }
+
// Memory operands.
bool isMem() const override {
return Kind == KindMem;
@@ -270,6 +276,11 @@ public:
return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100);
}
+ const MemOp& getMem() const {
+ assert(Kind == KindMem && "Not a Mem operand");
+ return Mem;
+ }
+
// Override MCParsedAsmOperand.
SMLoc getStartLoc() const override { return StartLoc; }
SMLoc getEndLoc() const override { return EndLoc; }
@@ -623,8 +634,61 @@ static struct InsnMatchEntry InsnMatchTable[] = {
{ MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
};
+static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
+ if (!E)
+ return;
+ if (auto *CE = dyn_cast<MCConstantExpr>(E))
+ OS << *CE;
+ else if (auto *UE = dyn_cast<MCUnaryExpr>(E))
+ OS << *UE;
+ else if (auto *BE = dyn_cast<MCBinaryExpr>(E))
+ OS << *BE;
+ else if (auto *SRE = dyn_cast<MCSymbolRefExpr>(E))
+ OS << *SRE;
+ else
+ OS << *E;
+}
+
void SystemZOperand::print(raw_ostream &OS) const {
- llvm_unreachable("Not implemented");
+ switch (Kind) {
+ break;
+ case KindToken:
+ OS << "Token:" << getToken();
+ break;
+ case KindReg:
+ OS << "Reg:" << SystemZInstPrinter::getRegisterName(getReg());
+ break;
+ case KindImm:
+ OS << "Imm:";
+ printMCExpr(getImm(), OS);
+ break;
+ case KindImmTLS:
+ OS << "ImmTLS:";
+ printMCExpr(getImmTLS().Imm, OS);
+ if (getImmTLS().Sym) {
+ OS << ", ";
+ printMCExpr(getImmTLS().Sym, OS);
+ }
+ break;
+ case KindMem: {
+ const MemOp &Op = getMem();
+ OS << "Mem:" << *cast<MCConstantExpr>(Op.Disp);
+ if (Op.Base) {
+ OS << "(";
+ if (Op.MemKind == BDLMem)
+ OS << *cast<MCConstantExpr>(Op.Length.Imm) << ",";
+ else if (Op.MemKind == BDRMem)
+ OS << SystemZInstPrinter::getRegisterName(Op.Length.Reg) << ",";
+ if (Op.Index)
+ OS << SystemZInstPrinter::getRegisterName(Op.Index) << ",";
+ OS << SystemZInstPrinter::getRegisterName(Op.Base);
+ OS << ")";
+ }
+ break;
+ }
+ case KindInvalid:
+ break;
+ }
}
// Parse one register of the form %<prefix><number>.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index bd99fabb48c9..e2de721be568 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -647,7 +647,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
}
// Force static initialization.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 9edd1fc36406..668a77ac014f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -294,11 +294,10 @@ bool SystemZElimCompare::convertToLoadAndTest(
return false;
// Rebuild to get the CC operand in the right place.
- MachineInstr *BuiltMI =
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
+ auto MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
for (const auto &MO : MI.operands())
- BuiltMI->addOperand(MO);
- BuiltMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB.add(MO);
+ MIB.setMemRefs(MI.memoperands());
MI.eraseFromParent();
return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index b9e5788cf018..8726b56bc94f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -49,14 +49,14 @@ getNumDecoderSlots(SUnit *SU) const {
if (!SC->isValid())
return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
- if (SC->BeginGroup) {
- if (!SC->EndGroup)
- return 2; // Cracked instruction
- else
- return 3; // Expanded/group-alone instruction
- }
-
- return 1; // Normal instruction
+ assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
+ "Only cracked instruction can have 2 uops.");
+ assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
+ "Expanded instructions always group alone.");
+ assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
+ "Expanded instructions fill the group(s).");
+
+ return SC->NumMicroOps;
}
unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
@@ -139,16 +139,21 @@ void SystemZHazardRecognizer::nextGroup() {
LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
LLVM_DEBUG(CurGroupDbg = "";);
- GrpCount++;
+ int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
+ assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
+ "Current decoder group bad.");
// Reset counter for next group.
CurrGroupSize = 0;
CurrGroupHas4RegOps = false;
- // Decrease counters for execution units by one.
+ GrpCount += ((unsigned) NumGroups);
+
+ // Decrease counters for execution units.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
- if (ProcResourceCounters[i] > 0)
- ProcResourceCounters[i]--;
+ ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
+ ? (ProcResourceCounters[i] - NumGroups)
+ : 0);
// Clear CriticalResourceIdx if it is now below the threshold.
if (CriticalResourceIdx != UINT_MAX &&
@@ -323,13 +328,13 @@ EmitInstruction(SUnit *SU) {
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
- unsigned GroupLim =
- ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
- assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
+ unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
+ assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
+ && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == GroupLim || SC->EndGroup)
+ if (CurrGroupSize >= GroupLim || SC->EndGroup)
nextGroup();
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5425f1d16e5e..5bc2ab0ef2d8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -71,19 +71,19 @@ struct SystemZAddressingMode {
// True if the address can (and must) include ADJDYNALLOC.
bool isDynAlloc() { return Form == FormBDXDynAlloc; }
- void dump() {
+ void dump(const llvm::SelectionDAG *DAG) {
errs() << "SystemZAddressingMode " << this << '\n';
errs() << " Base ";
if (Base.getNode())
- Base.getNode()->dump();
+ Base.getNode()->dump(DAG);
else
errs() << "null\n";
if (hasIndexField()) {
errs() << " Index ";
if (Index.getNode())
- Index.getNode()->dump();
+ Index.getNode()->dump(DAG);
else
errs() << "null\n";
}
@@ -589,7 +589,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
return false;
- LLVM_DEBUG(AM.dump());
+ LLVM_DEBUG(AM.dump(CurDAG));
return true;
}
@@ -728,8 +728,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
// The inner check covers all cases but is more expensive.
uint64_t Used = allOnes(Op.getValueSizeInBits());
if (Used != (AndMask | InsertMask)) {
- KnownBits Known;
- CurDAG->computeKnownBits(Op.getOperand(0), Known);
+ KnownBits Known = CurDAG->computeKnownBits(Op.getOperand(0));
if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))
return false;
}
@@ -787,8 +786,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// If some bits of Input are already known zeros, those bits will have
// been removed from the mask. See if adding them back in makes the
// mask suitable.
- KnownBits Known;
- CurDAG->computeKnownBits(Input, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Input);
Mask |= Known.Zero.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -811,8 +809,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// If some bits of Input are already known ones, those bits will have
// been removed from the mask. See if adding them back in makes the
// mask suitable.
- KnownBits Known;
- CurDAG->computeKnownBits(Input, Known);
+ KnownBits Known = CurDAG->computeKnownBits(Input);
Mask &= ~Known.One.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -1147,7 +1144,7 @@ bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
return false;
auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
- if (!Load || !Load->hasOneUse())
+ if (!Load || !Load->hasNUsesOfValue(1, 0))
return false;
if (Load->getMemoryVT().getSizeInBits() !=
Load->getValueType(0).getSizeInBits())
@@ -1308,7 +1305,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
return false;
case SystemZISD::SSUBO:
NegateOperand = true;
- /* fall through */
+ LLVM_FALLTHROUGH;
case SystemZISD::SADDO:
if (MemVT == MVT::i32)
NewOpc = SystemZ::ASI;
@@ -1319,7 +1316,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
break;
case SystemZISD::USUBO:
NegateOperand = true;
- /* fall through */
+ LLVM_FALLTHROUGH;
case SystemZISD::UADDO:
if (MemVT == MVT::i32)
NewOpc = SystemZ::ALSI;
@@ -1354,11 +1351,8 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
SDValue Ops[] = { Base, Disp, Operand, InputChain };
MachineSDNode *Result =
CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops);
-
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
- MemOp[0] = StoreNode->getMemOperand();
- MemOp[1] = LoadNode->getMemOperand();
- Result->setMemRefs(MemOp, MemOp + 2);
+ CurDAG->setNodeMemRefs(
+ Result, {StoreNode->getMemOperand(), LoadNode->getMemOperand()});
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e76fa71dacd7..2a825c1316f3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -452,29 +452,29 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f64, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
- setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
- setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
}
// We have fused multiply-addition for f32 and f64 but not f128.
@@ -523,10 +523,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::SDIV);
+ setTargetDAGCombine(ISD::UDIV);
+ setTargetDAGCombine(ISD::SREM);
+ setTargetDAGCombine(ISD::UREM);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -2213,8 +2219,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
if (!Mask)
return;
- KnownBits Known;
- DAG.computeKnownBits(C.Op0.getOperand(0), Known);
+ KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
return;
@@ -2912,12 +2917,12 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
DAG.getConstant(32, DL, MVT::i64));
}
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
- return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
MVT::f64, SDValue(U64, 0), In);
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
if (Subtarget.hasHighWord())
@@ -3160,10 +3165,9 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
// Get the known-zero masks for each operand.
- SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
- KnownBits Known[2];
- DAG.computeKnownBits(Ops[0], Known[0]);
- DAG.computeKnownBits(Ops[1], Known[1]);
+ SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
+ KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
+ DAG.computeKnownBits(Ops[1])};
// See if the upper 32 bits of one operand and the lower 32 bits of the
// other are known zero. They are the low and high operands respectively.
@@ -3346,8 +3350,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
}
// Get the known-zero mask for the operand.
- KnownBits Known;
- DAG.computeKnownBits(Op, Known);
+ KnownBits Known = DAG.computeKnownBits(Op);
unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
if (NumSignificantBits == 0)
return DAG.getConstant(0, DL, VT);
@@ -4475,6 +4478,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Constants with undefs to get a full vector constant and use that
// as the starting point.
SDValue Result;
+ SDValue ReplicatedVal;
if (NumConstants > 0) {
for (unsigned I = 0; I < NumElements; ++I)
if (!Constants[I].getNode())
@@ -4485,17 +4489,21 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// avoid a false dependency on any previous contents of the vector
// register.
- // Use a VLREP if at least one element is a load.
- unsigned LoadElIdx = UINT_MAX;
+ // Use a VLREP if at least one element is a load. Make sure to replicate
+ // the load with the most elements having its value.
+ std::map<const SDNode*, unsigned> UseCounts;
+ SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
if (Elems[I].getOpcode() == ISD::LOAD &&
cast<LoadSDNode>(Elems[I])->isUnindexed()) {
- LoadElIdx = I;
- break;
+ SDNode *Ld = Elems[I].getNode();
+ UseCounts[Ld]++;
+ if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
+ LoadMaxUses = Ld;
}
- if (LoadElIdx != UINT_MAX) {
- Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
- Done[LoadElIdx] = true;
+ if (LoadMaxUses != nullptr) {
+ ReplicatedVal = SDValue(LoadMaxUses, 0);
+ Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
} else {
// Try to use VLVGP.
unsigned I1 = NumElements / 2 - 1;
@@ -4516,7 +4524,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// Use VLVGx to insert the other elements.
for (unsigned I = 0; I < NumElements; ++I)
- if (!Done[I] && !Elems[I].isUndef())
+ if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
DAG.getConstant(I, DL, MVT::i32));
return Result;
@@ -5359,6 +5367,46 @@ SDValue SystemZTargetLowering::combineMERGE(
return SDValue();
}
+SDValue SystemZTargetLowering::combineLOAD(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT LdVT = N->getValueType(0);
+ if (LdVT.isVector() || LdVT.isInteger())
+ return SDValue();
+ // Transform a scalar load that is REPLICATEd as well as having other
+ // use(s) to the form where the other use(s) use the first element of the
+ // REPLICATE instead of the load. Otherwise instruction selection will not
+ // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
+ // point loads.
+
+ SDValue Replicate;
+ SmallVector<SDNode*, 8> OtherUses;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == SystemZISD::REPLICATE) {
+ if (Replicate)
+ return SDValue(); // Should never happen
+ Replicate = SDValue(*UI, 0);
+ }
+ else if (UI.getUse().getResNo() == 0)
+ OtherUses.push_back(*UI);
+ }
+ if (!Replicate || OtherUses.empty())
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
+ Replicate, DAG.getConstant(0, DL, MVT::i32));
+ // Update uses of the loaded Value while preserving old chains.
+ for (SDNode *U : OtherUses) {
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : U->ops())
+ Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
+ DAG.UpdateNodeOperands(U, Ops);
+ }
+ return SDValue(N, 0);
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5394,8 +5442,7 @@ SDValue SystemZTargetLowering::combineSTORE(
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(Op1.getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2)
};
return
@@ -5436,7 +5483,7 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
// (fpround (extract_vector_elt X 0))
// (fpround (extract_vector_elt X 1)) ->
// (extract_vector_elt (VROUND X) 0)
- // (extract_vector_elt (VROUND X) 1)
+ // (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
SelectionDAG &DAG = DCI.DAG;
@@ -5478,6 +5525,53 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineFP_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // (fpextend (extract_vector_elt X 0))
+ // (fpextend (extract_vector_elt X 2)) ->
+ // (extract_vector_elt (VEXTEND X) 0)
+ // (extract_vector_elt (VEXTEND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f64 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v4f32 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
+ SDValue OtherExtend = SDValue(*U->use_begin(), 0);
+ if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
+ OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ OtherExtend.getValueType() == MVT::f64) {
+ SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
+ DCI.AddToWorklist(VExtend.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
+ VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
+ VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5492,13 +5586,14 @@ SDValue SystemZTargetLowering::combineBSWAP(
// Create the byte-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
- LD->getBasePtr(), // Ptr
- DAG.getValueType(N->getValueType(0)) // VT
+ LD->getBasePtr() // Ptr
};
+ EVT LoadVT = N->getValueType(0);
+ if (LoadVT == MVT::i16)
+ LoadVT = MVT::i32;
SDValue BSLoad =
DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
- DAG.getVTList(N->getValueType(0) == MVT::i64 ?
- MVT::i64 : MVT::i32, MVT::Other),
+ DAG.getVTList(LoadVT, MVT::Other),
Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
@@ -5664,6 +5759,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK(
return Select->getOperand(4);
}
+SDValue SystemZTargetLowering::combineIntDIVREM(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // In the case where the divisor is a vector of constants a cheaper
+ // sequence of instructions can replace the divide. BuildSDIV is called to
+ // do this during DAG combining, but it only succeeds when it can build a
+ // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
+ // since it is not Legal but Custom it can only happen before
+ // legalization. Therefore we must scalarize this early before Combine
+ // 1. For widened vectors, this is already the result of type legalization.
+ if (VT.isVector() && isTypeLegal(VT) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
+ return DAG.UnrollVectorOp(N);
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
@@ -5673,14 +5785,20 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
case SystemZISD::MERGE_HIGH:
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
+ case ISD::LOAD: return combineLOAD(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: return combineIntDIVREM(N, DCI);
}
return SDValue();
@@ -5791,10 +5909,10 @@ static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
unsigned OpNo) {
APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
- unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
- KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
- DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
+ KnownBits LHSKnown =
+ DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
+ KnownBits RHSKnown =
+ DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
Known.One = LHSKnown.One & RHSKnown.One;
}
@@ -5860,9 +5978,8 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::s390_vuplf: {
SDValue SrcOp = Op.getOperand(1);
unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
- Known = KnownBits(SrcBitWidth);
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
- DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
+ Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(SrcBitWidth);
@@ -5881,7 +5998,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case SystemZISD::REPLICATE: {
SDValue SrcOp = Op.getOperand(0);
- DAG.computeKnownBits(SrcOp, Known, Depth + 1);
+ Known = DAG.computeKnownBits(SrcOp, Depth + 1);
if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
break;
@@ -6852,7 +6969,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
.addImm(ThisLength)
.add(SrcBase)
.addImm(SrcDisp)
- ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ .setMemRefs(MI.memoperands());
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 267e31a85216..622da32e418d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -337,18 +337,8 @@ enum NodeType : unsigned {
// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
ATOMIC_CMP_SWAP_128,
- // Byte swapping load.
- //
- // Operand 0: the address to load from
- // Operand 1: the type of load (i16, i32, i64)
- LRV,
-
- // Byte swapping store.
- //
- // Operand 0: the value to store
- // Operand 1: the address to store to
- // Operand 2: the type of store (i16, i32, i64)
- STRV,
+ // Byte swapping load/store. Same operands as regular load/store.
+ LRV, STRV,
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
@@ -389,7 +379,7 @@ public:
// want to clobber the upper 32 bits of a GPR unnecessarily.
return MVT::i32;
}
- TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
// Widen subvectors to the full width rather than promoting integer
// elements. This is better because:
@@ -597,14 +587,17 @@ private:
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 4e47752ed122..1374ee91fa29 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -97,7 +97,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -110,7 +110,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
@@ -187,7 +187,7 @@ def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
let Predicates = [FeatureNoVectorEnhancements1] in {
def : Pat<(f32 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fpround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
}
@@ -446,13 +446,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_r32), FP32:$src2)>;
+ FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fpextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index e3f9a9645d13..1e904a86ea79 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2900,7 +2900,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
}
multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
- let M4 = 0 in
+ let M4 = 0, Defs = [CC] in
def "" : InstVRRa<opcode, (outs VR128:$V1),
(ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $M3, $M5", []>;
@@ -3472,7 +3472,9 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode>
: InstVRRb<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+ let Defs = [CC];
+}
// Declare a pair of instructions, one which sets CC and one which doesn't.
// The CC-setting form ends with "S" and sets the low bit of M5.
@@ -3496,9 +3498,10 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
}
multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : InstVRRb<opcode, (outs VR128:$V1),
- (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ let Defs = [CC] in
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
imm32zx4:$M4, 0)>;
@@ -4185,9 +4188,10 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
}
multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : InstVRRb<opcode, (outs VR128:$V1),
- (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
- mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+ let Defs = [CC] in
+ def "" : InstVRRb<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
imm32zx4:$M4, 0)>;
@@ -4385,7 +4389,8 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
}
multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
+ let Defs = [CC] in
+ def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4:$M5, 0)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index f0f9211efd5d..b03b4edaa4ab 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -880,10 +880,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SystemZ::FP128BitRegClass.contains(SrcReg)) {
unsigned SrcRegHi =
RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
unsigned SrcRegLo =
RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
.addReg(SrcRegHi, getKillRegState(KillSrc))
@@ -894,10 +894,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SystemZ::VR128BitRegClass.contains(SrcReg)) {
unsigned DestRegHi =
RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
unsigned DestRegLo =
RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
- SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
if (DestRegHi != SrcReg)
copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index bb5b7aae883b..8d3b1011d0a7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -756,16 +756,15 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
-// Byte-swapping loads. Unlike normal loads, these instructions are
-// allowed to access storage more than once.
-def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
-def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
-
-// Likewise byte-swapping stores.
-def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
-def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
-def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
+// Byte-swapping loads.
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_loadbswap16, GR32, 2>;
+def LRV : UnaryRXY<"lrv", 0xE31E, z_loadbswap32, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_loadbswap64, GR64, 8>;
+
+// Byte-swapping stores.
+def STRVH : StoreRXY<"strvh", 0xE33F, z_storebswap16, GR32, 2>;
+def STRV : StoreRXY<"strv", 0xE33E, z_storebswap32, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_storebswap64, GR64, 8>;
// Byte-swapping memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 92b86575235a..6c97b85277c3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -151,13 +151,13 @@ let Predicates = [FeatureVector] in {
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
- def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezf32 bdxaddr12only:$addr),
(VLLEZF bdxaddr12only:$addr)>;
- def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezf64 bdxaddr12only:$addr),
(VLLEZG bdxaddr12only:$addr)>;
let Predicates = [FeatureVectorEnhancements1] in {
def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>;
- def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)),
+ def : Pat<(z_vllezlf32 bdxaddr12only:$addr),
(VLLEZLF bdxaddr12only:$addr)>;
}
@@ -1031,7 +1031,7 @@ let Predicates = [FeatureVector] in {
// Maximum.
multiclass VectorMax<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, fmaxnum, tr, 4>;
- def : FPMinMax<insn, fmaxnan, tr, 1>;
+ def : FPMinMax<insn, fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
@@ -1055,7 +1055,7 @@ let Predicates = [FeatureVector] in {
// Minimum.
multiclass VectorMin<Instruction insn, TypedReg tr> {
def : FPMinMax<insn, fminnum, tr, 4>;
- def : FPMinMax<insn, fminnan, tr, 1>;
+ def : FPMinMax<insn, fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
@@ -1405,8 +1405,8 @@ multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
(vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
subreg), 0)>;
}
-defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
-defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_h32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_h64>;
// Match v2f64 insertions. The AddedComplexity counters the 3 added by
// TableGen for the base register operand in VLVG-based integer insertions
@@ -1414,10 +1414,10 @@ defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
let AddedComplexity = 4 in {
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
(VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
- subreg_r64), VR128:$vec, 1)>;
+ subreg_h64), VR128:$vec, 1)>;
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
(VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
- subreg_r64), 0)>;
+ subreg_h64), 0)>;
}
// We extract floating-point element X by replicating (for elements other
@@ -1426,14 +1426,14 @@ let AddedComplexity = 4 in {
// extractions and ensures that this version is strictly better.
let AddedComplexity = 4 in {
def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
- (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+ (EXTRACT_SUBREG VR128:$vec, subreg_h32)>;
def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
- (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_h32)>;
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
- (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+ (EXTRACT_SUBREG VR128:$vec, subreg_h64)>;
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
- (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_h64)>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 5103867e2d9a..626675bfb70c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -127,14 +127,6 @@ def SDT_ZIPM : SDTypeProfile<1, 1,
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
-def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
- [SDTCisInt<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
-def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
- [SDTCisInt<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
def SDT_ZTBegin : SDTypeProfile<1, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
@@ -283,9 +275,9 @@ def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>;
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
-def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@@ -429,16 +421,28 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
// Pattern fragments
//===----------------------------------------------------------------------===//
-def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
-def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
-def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+def z_loadbswap16 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_loadbswap32 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_loadbswap64 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
-def z_strvh : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i16)>;
-def z_strv : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i32)>;
-def z_strvg : PatFrag<(ops node:$src, node:$addr),
- (z_storebswap node:$src, node:$addr, i64)>;
+def z_storebswap16 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_storebswap32 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_storebswap64 : PatFrag<(ops node:$src, node:$addr),
+ (z_storebswap node:$src, node:$addr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
// Fragments including CC as an implicit source.
def z_br_ccmask
@@ -556,7 +560,6 @@ class NonvolatileLoad<SDPatternOperator load>
auto *Load = cast<LoadSDNode>(N);
return !Load->isVolatile();
}]>;
-def nonvolatile_load : NonvolatileLoad<load>;
def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
@@ -567,7 +570,6 @@ class NonvolatileStore<SDPatternOperator store>
auto *Store = cast<StoreSDNode>(N);
return !Store->isVolatile();
}]>;
-def nonvolatile_store : NonvolatileStore<store>;
def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
@@ -743,37 +745,37 @@ class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
def z_vllezi32 : z_vllez<i32, load, 1>;
-def z_vllezi64 : PatFrag<(ops node:$addr),
- (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+def z_vllezi64 : PatFrags<(ops node:$addr),
+ [(z_vector_insert (z_vzero),
+ (i64 (load node:$addr)), (i32 0)),
+ (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
// We use high merges to form a v4f32 from four f32s. Propagating zero
// into all elements but index 1 gives this expression.
def z_vllezf32 : PatFrag<(ops node:$addr),
- (bitconvert
- (z_merge_high
- (v2i64
- (z_unpackl_high
- (v4i32
- (bitconvert
- (v4f32 (scalar_to_vector
- (f32 (load node:$addr)))))))),
- (v2i64 (z_vzero))))>;
+ (z_merge_high
+ (v2i64
+ (z_unpackl_high
+ (v4i32
+ (bitconvert
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))))))),
+ (v2i64 (z_vzero)))>;
def z_vllezf64 : PatFrag<(ops node:$addr),
(z_merge_high
- (scalar_to_vector (f64 (load node:$addr))),
+ (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
(z_vzero))>;
// Similarly for the high element of a zeroed vector.
def z_vllezli32 : z_vllez<i32, load, 0>;
def z_vllezlf32 : PatFrag<(ops node:$addr),
- (bitconvert
- (z_merge_high
- (v2i64
- (bitconvert
- (z_merge_high
- (v4f32 (scalar_to_vector
- (f32 (load node:$addr)))),
- (v4f32 (z_vzero))))),
- (v2i64 (z_vzero))))>;
+ (z_merge_high
+ (v2i64
+ (bitconvert
+ (z_merge_high
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))),
+ (v4f32 (z_vzero))))),
+ (v2i64 (z_vzero)))>;
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 76ed6f80ba55..e9f9188048da 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -63,6 +63,10 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
+ VirtReg, Order, Hints, MF, VRM, Matrix);
+
if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
SmallVector<unsigned, 8> Worklist;
SmallSet<unsigned, 4> DoneRegs;
@@ -84,8 +88,18 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
getRC32(TrueMO, VRM, MRI));
if (RC && RC != &SystemZ::GRX32BitRegClass) {
+ // Pass the registers of RC as hints while making sure that if
+ // any of these registers are copy hints, hint them first.
+ SmallSet<unsigned, 4> CopyHints;
+ CopyHints.insert(Hints.begin(), Hints.end());
+ Hints.clear();
+ for (MCPhysReg Reg : Order)
+ if (CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
+ Hints.push_back(Reg);
for (MCPhysReg Reg : Order)
- if (RC->contains(Reg) && !MRI->isReserved(Reg))
+ if (!CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
Hints.push_back(Reg);
// Return true to make these hints the only regs available to
// RA. This may mean extra spilling but since the alternative is
@@ -102,8 +116,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
}
- return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
- VRM, Matrix);
+ return BaseImplRetVal;
}
const MCPhysReg *
@@ -270,25 +283,30 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
// Check that the two virtual registers are local to MBB.
MachineBasicBlock *MBB = MI->getParent();
- if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) ||
- LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB))
+ MachineInstr *FirstMI_GR128 =
+ LIS.getInstructionFromIndex(IntGR128.beginIndex());
+ MachineInstr *FirstMI_GRNar =
+ LIS.getInstructionFromIndex(IntGRNar.beginIndex());
+ MachineInstr *LastMI_GR128 = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ MachineInstr *LastMI_GRNar = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ if ((!FirstMI_GR128 || FirstMI_GR128->getParent() != MBB) ||
+ (!FirstMI_GRNar || FirstMI_GRNar->getParent() != MBB) ||
+ (!LastMI_GR128 || LastMI_GR128->getParent() != MBB) ||
+ (!LastMI_GRNar || LastMI_GRNar->getParent() != MBB))
return false;
- // Find the first and last MIs of the registers.
- MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+ MachineBasicBlock::iterator MII = nullptr, MEE = nullptr;
if (WideOpNo == 1) {
- FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex());
- LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ MII = FirstMI_GR128;
+ MEE = LastMI_GRNar;
} else {
- FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex());
- LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ MII = FirstMI_GRNar;
+ MEE = LastMI_GR128;
}
- assert (FirstMI && LastMI && "No instruction from index?");
// Check if coalescing seems safe by finding the set of clobbered physreg
// pairs in the region.
BitVector PhysClobbered(getNumRegs());
- MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI;
MEE++;
for (; MII != MEE; ++MII) {
for (const MachineOperand &MO : MII->operands())
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 94781659a50a..9fd2e4ae4f00 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -57,8 +57,6 @@ public:
const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 79ba7534f92c..cea88c088b86 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -25,11 +25,8 @@ def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
def subreg_l64 : SubRegIndex<64, 0>;
def subreg_h64 : SubRegIndex<64, 64>;
-def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
-def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
-def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>;
}
// Define a register class that contains values of types TYPES and an
@@ -188,7 +185,7 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
class FPR64<bits<16> num, string n, FPR32 high>
: SystemZRegWithSubregs<n, [high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_r32];
+ let SubRegIndices = [subreg_h32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
@@ -231,7 +228,7 @@ defm FP128 : SystemZRegClass<"FP128", [f128], 128,
class VR128<bits<16> num, string n, FPR64 high>
: SystemZRegWithSubregs<n, [high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_r64];
+ let SubRegIndices = [subreg_h64];
}
// Full vector registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
index 385a94b5d6a9..83bf97e6841a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -12,11 +12,13 @@
// These resources are used to express decoder grouping rules. The number of
// decoder slots needed by an instructions is normally one, but there are
// exceptions.
-def NormalGr : SchedWrite;
-def Cracked : SchedWrite;
-def GroupAlone : SchedWrite;
-def BeginGroup : SchedWrite;
-def EndGroup : SchedWrite;
+def NormalGr : SchedWrite;
+def Cracked : SchedWrite;
+def GroupAlone : SchedWrite;
+def GroupAlone2 : SchedWrite;
+def GroupAlone3 : SchedWrite;
+def BeginGroup : SchedWrite;
+def EndGroup : SchedWrite;
// A SchedWrite added to other SchedWrites to make LSU latency parameterizable.
def LSULatency : SchedWrite;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index 5d32232107af..74e1dad87908 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -131,7 +141,7 @@ def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -474,13 +484,13 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXa2, LSU, GroupAlone],
//===----------------------------------------------------------------------===//
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
(instregex "DSG(F)?$")>;
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -490,7 +500,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -597,9 +607,9 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
-def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone],
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone3],
(instregex "CDSG$")>;
// Compare and swap and store
@@ -620,7 +630,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -643,21 +653,21 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD],
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -674,7 +684,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
// Load/store access multiple (not modeled precisely)
def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
//===----------------------------------------------------------------------===//
// Program mask and addressing mode
@@ -704,7 +714,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
//===----------------------------------------------------------------------===//
// Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
// Transaction end
def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -813,9 +823,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -899,7 +909,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
// Test Data Class
def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
//===----------------------------------------------------------------------===//
// FP: Floating-point control register instructions
@@ -941,7 +951,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1054,9 +1064,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
// Convert to fixed / logical
def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1068,19 +1078,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
// Convert from / to packed
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
@@ -1129,7 +1139,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1137,7 +1147,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1200,7 +1210,7 @@ def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
//===----------------------------------------------------------------------===//
@@ -1414,7 +1424,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1458,8 +1468,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
// System: Memory-move Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
@@ -1491,8 +1501,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index 515f968e5091..1962fdf3a1d1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -132,7 +142,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -483,13 +493,14 @@ def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
(instregex "DSG(F)?$")>;
def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+ (instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
// Shifts
@@ -499,7 +510,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -606,10 +617,10 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
- GroupAlone], (instregex "CDSG$")>;
+ GroupAlone3], (instregex "CDSG$")>;
// Compare and swap and store
def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
@@ -629,7 +640,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -662,21 +673,21 @@ def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -693,7 +704,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
// Load/store access multiple (not modeled precisely)
def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
//===----------------------------------------------------------------------===//
// Program mask and addressing mode
@@ -723,7 +734,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
//===----------------------------------------------------------------------===//
// Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
// Transaction end
def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -832,9 +843,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -918,7 +929,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
// Test Data Class
def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
//===----------------------------------------------------------------------===//
// FP: Floating-point control register instructions
@@ -960,7 +971,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1071,9 +1082,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
// Convert to fixed / logical
def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1085,19 +1096,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
// Convert from / to packed
def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
@@ -1146,7 +1157,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1154,7 +1165,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1218,7 +1229,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
@@ -1469,7 +1480,7 @@ def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "VLIP$")>;
-def : InstRW<[WLat6, VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
def : InstRW<[WLat1, VecDFX, FXb, LSU, Cracked], (instregex "VUPKZ$")>;
def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVB(G)?$")>;
def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVD(G)?$")>;
@@ -1489,7 +1500,7 @@ def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
-def : InstRW<[WLat20, GroupAlone], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
@@ -1502,7 +1513,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1547,8 +1558,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
// System: Memory-move Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
@@ -1580,8 +1591,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 3012b565d5ef..7535739f813a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
def : WriteRes<EndGroup, []> { let EndGroup = 1; }
}
-def : WriteRes<Cracked, []> {
- let NumMicroOps = 2;
- let BeginGroup = 1;
-}
def : WriteRes<GroupAlone, []> {
let NumMicroOps = 3;
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -114,7 +120,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCT(G|H)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -439,14 +445,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
// Division and remainder
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
(instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
@@ -457,7 +463,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -560,7 +566,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
(instregex "CDSG$")>;
@@ -604,12 +610,12 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
-def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
+def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone2],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone3], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
@@ -701,13 +707,13 @@ def : InstRW<[], (instregex "Insn.*")>;
// Load zero
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>;
// Load
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>;
def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
// Load and Test
def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
@@ -747,10 +753,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -874,7 +880,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -986,11 +992,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
// Convert to fixed / logical
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1002,9 +1008,9 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Perform floating-point operation
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
@@ -1051,7 +1057,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1059,7 +1065,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 892f493570d1..a21d2c4cef70 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
def : WriteRes<EndGroup, []> { let EndGroup = 1; }
}
-def : WriteRes<Cracked, []> {
- let NumMicroOps = 2;
- let BeginGroup = 1;
-}
def : WriteRes<GroupAlone, []> {
let NumMicroOps = 3;
let BeginGroup = 1;
let EndGroup = 1;
}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
// Incoming latency removed from the register operand which is used together
// with a memory operand by the instruction.
@@ -119,7 +125,7 @@ def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
def : InstRW<[WLat1, FXU, EndGroup], (instregex "BRCT(G)?$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCTH$")>;
def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
(instregex "B(R)?X(H|L).*$")>;
// Compare and branch
@@ -450,14 +456,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
// Division and remainder
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
(instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
(instregex "DL(G)?$")>;
//===----------------------------------------------------------------------===//
@@ -468,7 +474,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
(instregex "S(L|R)D(A|L)$")>;
// Rotate
@@ -572,7 +578,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
(instregex "CS(G|Y)?$")>;
// Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
(instregex "CDS(Y)?$")>;
def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
(instregex "CDSG$")>;
@@ -595,7 +601,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[WLat1, LSU, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone2],
(instregex "TRT$")>;
def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -617,11 +623,11 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
(instregex "CVBG$")>;
def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
(instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
@@ -785,10 +791,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
// Convert from fixed / logical
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
// Convert to fixed / logical
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -912,7 +918,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
// Convert from fixed
def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
// Convert to fixed
def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -1024,11 +1030,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
// Convert to fixed / logical
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1040,13 +1046,13 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
// Convert from / to zoned
def : InstRW<[WLat4LSU, LSU, DFU2, GroupAlone], (instregex "CDZT$")>;
-def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone3], (instregex "CXZT$")>;
def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZDT$")>;
def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZXT$")>;
@@ -1095,7 +1101,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
// Reround
def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
// Shift significand left/right
def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1103,7 +1109,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
// DFP: Comparisons
@@ -1223,7 +1229,7 @@ def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>;
def : InstRW<[WLat30, MCD], (instregex "SCKC$")>;
def : InstRW<[WLat30, MCD], (instregex "SPT$")>;
def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXU2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat20, LSU4, FXU2, GroupAlone2], (instregex "STCKE$")>;
def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
def : InstRW<[WLat30, MCD], (instregex "STPT$")>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 9cd09b0f911e..fb030a207bc7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -19,6 +19,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "SystemZGenSubtargetInfo.inc"
+static cl::opt<bool> UseSubRegLiveness(
+ "systemz-subreg-liveness",
+ cl::desc("Enable subregister liveness tracking for SystemZ (experimental)"),
+ cl::Hidden);
+
// Pin the vtable to this file.
void SystemZSubtarget::anchor() {}
@@ -54,6 +59,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
+
+bool SystemZSubtarget::enableSubRegLiveness() const {
+ return UseSubRegLiveness;
+}
+
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
CodeModel::Model CM) const {
// PC32DBL accesses require the low bit to be clear. Note that a zero
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 8285b4277d11..cb6b21a1d465 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -102,6 +102,9 @@ public:
// Always enable the early if-conversion pass.
bool enableEarlyIfConversion() const override { return true; }
+ // Enable tracking of subregister liveness in register allocator.
+ bool enableSubRegLiveness() const override;
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f3620dcf3b92..9596a2b6388d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -128,10 +128,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
// in range of LARL. However, the JIT environment has no equivalent
// of copy relocs, so locally-binding data symbols might not be in
// the range of LARL. We need the Medium model in that case.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- Reloc::Model RM, bool JIT) {
- if (CM)
+static CodeModel::Model
+getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+ bool JIT) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
+ if (*CM == CodeModel::Kernel)
+ report_fatal_error("Target does not support the kernel CodeModel");
return *CM;
+ }
if (JIT)
return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
return CodeModel::Small;
@@ -146,7 +152,8 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
+ getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
+ OL),
TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index c5cdc22f2099..129610fe095b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -328,6 +328,25 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
}
+// Return the bit size for the scalar type or vector element
+// type. getScalarSizeInBits() returns 0 for a pointer type.
+static unsigned getScalarSizeInBits(Type *Ty) {
+ unsigned Size =
+ (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
+ assert(Size > 0 && "Element must have non-zero size.");
+ return Size;
+}
+
+// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector
+// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
+// 3.
+static unsigned getNumVectorRegs(Type *Ty) {
+ assert(Ty->isVectorTy() && "Expected vector type");
+ unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+ assert(WideBits > 0 && "Could not compute size of vector");
+ return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+}
+
int SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
@@ -343,44 +362,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
- // Div with a constant which is a power of 2 will be converted by
- // DAGCombiner to use shifts. With vector shift-element instructions, a
- // vector sdiv costs about as much as a scalar one.
- const unsigned SDivCostEstimate = 4;
- bool SDivPow2 = false;
- bool UDivPow2 = false;
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
- Args.size() == 2) {
- const ConstantInt *CI = nullptr;
+ // There are thre cases of division and remainder: Dividing with a register
+ // needs a divide instruction. A divisor which is a power of two constant
+ // can be implemented with a sequence of shifts. Any other constant needs a
+ // multiply and shifts.
+ const unsigned DivInstrCost = 20;
+ const unsigned DivMulSeqCost = 10;
+ const unsigned SDivPow2Cost = 4;
+
+ bool SignedDivRem =
+ Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+ bool UnsignedDivRem =
+ Opcode == Instruction::UDiv || Opcode == Instruction::URem;
+
+ // Check for a constant divisor.
+ bool DivRemConst = false;
+ bool DivRemConstPow2 = false;
+ if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
if (const Constant *C = dyn_cast<Constant>(Args[1])) {
- if (C->getType()->isVectorTy())
- CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ const ConstantInt *CVal =
+ (C->getType()->isVectorTy()
+ ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
+ : dyn_cast<const ConstantInt>(C));
+ if (CVal != nullptr &&
+ (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ DivRemConstPow2 = true;
else
- CI = dyn_cast<const ConstantInt>(C);
- }
- if (CI != nullptr &&
- (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
- if (Opcode == Instruction::SDiv)
- SDivPow2 = true;
- else
- UDivPow2 = true;
+ DivRemConst = true;
}
}
if (Ty->isVectorTy()) {
- assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ assert(ST->hasVector() &&
+ "getArithmeticInstrCost() called with vector type.");
unsigned VF = Ty->getVectorNumElements();
- unsigned NumVectors = getNumberOfParts(Ty);
+ unsigned NumVectors = getNumVectorRegs(Ty);
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr || UDivPow2) {
+ Opcode == Instruction::AShr) {
return NumVectors;
}
- if (SDivPow2)
- return (NumVectors * SDivCostEstimate);
+ if (DivRemConstPow2)
+ return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
+ if (DivRemConst)
+ return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ if ((SignedDivRem || UnsignedDivRem) && VF > 4)
+ // Temporary hack: disable high vectorization factors with integer
+ // division/remainder, which will get scalarized and handled with
+ // GR128 registers. The mischeduler is not clever enough to avoid
+ // spilling yet.
+ return 1000;
// These FP operations are supported with a single vector instruction for
// double (base implementation assumes float generally costs 2). For
@@ -395,7 +429,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
return NumVectors;
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned ScalarCost =
+ getArithmeticInstrCost(Opcode, Ty->getScalarType());
unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
@@ -432,30 +467,22 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
- if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
- return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
-
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
return 1;
- if (Opcode == Instruction::Xor && ScalarBits == 1)
- // 2 * ipm sequences ; xor ; shift ; compare
- return 7;
-
- if (UDivPow2)
- return 1;
- if (SDivPow2)
- return SDivCostEstimate;
-
- // An extra extension for narrow types is needed.
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
- // sext of op(s) for narrow types
- return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+ if (Opcode == Instruction::Xor && ScalarBits == 1) {
+ if (ST->hasLoadStoreOnCond2())
+ return 5; // 2 * (li 0; loc 1); xor
+ return 7; // 2 * ipm sequences ; xor ; shift ; compare
+ }
- if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
- // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
- return (ScalarBits < 32 ? 4 : 2);
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem || UnsignedDivRem)
+ return DivInstrCost;
}
// Fallback to the default implementation.
@@ -463,12 +490,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo, Args);
}
-
int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
assert (Tp->isVectorTy());
assert (ST->hasVector() && "getShuffleCost() called.");
- unsigned NumVectors = getNumberOfParts(Tp);
+ unsigned NumVectors = getNumVectorRegs(Tp);
// TODO: Since fp32 is expanded, the shuffle cost should always be 0.
@@ -523,7 +549,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
// TODO: Since fp32 is expanded, the extract cost should always be 0.
- unsigned NumParts = getNumberOfParts(SrcTy);
+ unsigned NumParts = getNumVectorRegs(SrcTy);
if (NumParts <= 2)
// Up to 2 vector registers can be truncated efficiently with pack or
// permute. The latter requires an immediate mask to be loaded, which
@@ -566,7 +592,7 @@ getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
// The bitmask will be truncated.
PackCost = getVectorTruncCost(SrcTy, DstTy);
else if (SrcScalarBits < DstScalarBits) {
- unsigned DstNumParts = getNumberOfParts(DstTy);
+ unsigned DstNumParts = getNumVectorRegs(DstTy);
// Each vector select needs its part of the bitmask unpacked.
PackCost = Log2Diff * DstNumParts;
// Extra cost for moving part of mask before unpacking.
@@ -602,6 +628,25 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
return nullptr;
}
+// Get the cost of converting a boolean vector to a vector with same width
+// and element size as Dst, plus the cost of zero extending if needed.
+unsigned SystemZTTIImpl::
+getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+ const Instruction *I) {
+ assert (Dst->isVectorTy());
+ unsigned VF = Dst->getVectorNumElements();
+ unsigned Cost = 0;
+ // If we know what the widths of the compared operands, get any cost of
+ // converting it to match Dst. Otherwise assume same widths.
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += getNumVectorRegs(Dst);
+ return Cost;
+}
+
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) {
unsigned DstScalarBits = Dst->getScalarSizeInBits();
@@ -611,8 +656,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
assert (Dst->isVectorTy());
unsigned VF = Src->getVectorNumElements();
- unsigned NumDstVectors = getNumberOfParts(Dst);
- unsigned NumSrcVectors = getNumberOfParts(Src);
+ unsigned NumDstVectors = getNumVectorRegs(Dst);
+ unsigned NumSrcVectors = getNumVectorRegs(Src);
if (Opcode == Instruction::Trunc) {
if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
@@ -633,19 +678,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
}
- else if (SrcScalarBits == 1) {
- // This should be extension of a compare i1 result.
- // If we know what the widths of the compared operands, get the
- // cost of converting it to Dst. Otherwise assume same widths.
- unsigned Cost = 0;
- Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
- if (CmpOpTy != nullptr)
- Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
- if (Opcode == Instruction::ZExt)
- // One 'vn' per dst vector with an immediate mask.
- Cost += NumDstVectors;
- return Cost;
- }
+ else if (SrcScalarBits == 1)
+ return getBoolVecToIntConversionCost(Opcode, Dst, I);
}
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
@@ -654,8 +688,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// (seems to miss on differentiating on scalar/vector types).
// Only 64 bit vector conversions are natively supported.
- if (SrcScalarBits == 64 && DstScalarBits == 64)
- return NumDstVectors;
+ if (DstScalarBits == 64) {
+ if (SrcScalarBits == 64)
+ return NumDstVectors;
+
+ if (SrcScalarBits == 1)
+ return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors;
+ }
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. Base implementation does not
@@ -672,7 +711,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
(Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
NeedsExtracts = false;
- TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+ TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
// FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -702,11 +742,18 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
else { // Scalar
assert (!Dst->isVectorTy());
- if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
- return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+ if (SrcScalarBits >= 32 ||
+ (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+ return 1;
+ return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+ }
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
+ if (ST->hasLoadStoreOnCond2())
+ return 2; // li 0; loc 1
+
// This should be extension of a compare i1 result, which is done with
// ipm and a varying sequence of instructions.
unsigned Cost = 0;
@@ -718,7 +765,6 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
// If operands of an fp-type was compared, this costs +1.
Cost++;
-
return Cost;
}
}
@@ -726,8 +772,20 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
}
-int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- const Instruction *I) {
+// Scalar i8 / i16 operations will typically be made after first extending
+// the operands to i32.
+static unsigned getOperandsExtensionCost(const Instruction *I) {
+ unsigned ExtCost = 0;
+ for (Value *Op : I->operands())
+ // A load of i8 or i16 sign/zero extends to i32.
+ if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op))
+ ExtCost++;
+
+ return ExtCost;
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy, const Instruction *I) {
if (ValTy->isVectorTy()) {
assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
unsigned VF = ValTy->getVectorNumElements();
@@ -759,7 +817,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
// Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
// floats. FIXME: <2 x float> generates same code as <4 x float>.
unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
- unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+ unsigned NumVecs_cmp = getNumVectorRegs(ValTy);
unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
return Cost;
@@ -775,20 +833,30 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
PackCost =
getVectorBitmaskConversionCost(CmpOpTy, ValTy);
- return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
}
}
else { // Scalar
switch (Opcode) {
case Instruction::ICmp: {
+ // A loaded value compared with 0 with multiple users becomes Load and
+ // Test. The load is then not foldable, so return 0 cost for the ICmp.
+ unsigned ScalarBits = ValTy->getScalarSizeInBits();
+ if (I != nullptr && ScalarBits >= 32)
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+ C->getZExtValue() == 0)
+ return 0;
+
unsigned Cost = 1;
if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
- Cost += 2; // extend both operands
+ Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
return Cost;
}
case Instruction::Select:
if (ValTy->isFloatingPointTy())
- return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 4; // No load on condition for FP - costs a conditional jump.
return 1; // Load On Condition.
}
}
@@ -804,7 +872,7 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return ((Index % 2 == 0) ? 1 : 0);
if (Opcode == Instruction::ExtractElement) {
- int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+ int Cost = ((getScalarSizeInBits(Val) == 1) ? 2 /*+test-under-mask*/ : 1);
// Give a slight penalty for moving out of vector pipeline to FXU unit.
if (Index == 0 && Val->isIntOrIntVectorTy())
@@ -816,58 +884,147 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
+// Check if a load may be folded as a memory operand in its user.
+bool SystemZTTIImpl::
+isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
+ if (!Ld->hasOneUse())
+ return false;
+ FoldedValue = Ld;
+ const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+ unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+ unsigned TruncBits = 0;
+ unsigned SExtBits = 0;
+ unsigned ZExtBits = 0;
+ if (UserI->hasOneUse()) {
+ unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+ if (isa<TruncInst>(UserI))
+ TruncBits = UserBits;
+ else if (isa<SExtInst>(UserI))
+ SExtBits = UserBits;
+ else if (isa<ZExtInst>(UserI))
+ ZExtBits = UserBits;
+ }
+ if (TruncBits || SExtBits || ZExtBits) {
+ FoldedValue = UserI;
+ UserI = cast<Instruction>(*UserI->user_begin());
+ // Load (single use) -> trunc/extend (single use) -> UserI
+ }
+ if ((UserI->getOpcode() == Instruction::Sub ||
+ UserI->getOpcode() == Instruction::SDiv ||
+ UserI->getOpcode() == Instruction::UDiv) &&
+ UserI->getOperand(1) != FoldedValue)
+ return false; // Not commutative, only RHS foldable.
+ // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an
+ // extension was made of the load.
+ unsigned LoadOrTruncBits =
+ ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
+ switch (UserI->getOpcode()) {
+ case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+ case Instruction::Sub:
+ case Instruction::ICmp:
+ if (LoadedBits == 32 && ZExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+ if (UserI->getOpcode() != Instruction::ICmp) {
+ if (LoadedBits == 16 &&
+ (SExtBits == 32 ||
+ (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
+ return true;
+ if (LoadOrTruncBits == 16)
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ case Instruction::SDiv:// SE: 32->64
+ if (LoadedBits == 32 && SExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+
+ // All possible extensions of memory checked above.
+
+ // Comparison between memory and immediate.
+ if (UserI->getOpcode() == Instruction::ICmp)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
+ if (isUInt<16>(CI->getZExtValue()))
+ return true;
+ return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+ break;
+ }
+ return false;
+}
+
+static bool isBswapIntrinsicCall(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ if (auto *CI = dyn_cast<CallInst>(I))
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::bswap)
+ return true;
+ return false;
+}
+
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
- if (!Src->isVectorTy() && Opcode == Instruction::Load &&
- I != nullptr && I->hasOneUse()) {
- const Instruction *UserI = cast<Instruction>(*I->user_begin());
- unsigned Bits = Src->getScalarSizeInBits();
- bool FoldsLoad = false;
- switch (UserI->getOpcode()) {
- case Instruction::ICmp:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- // This also makes sense for float operations, but disabled for now due
- // to regressions.
- // case Instruction::FCmp:
- // case Instruction::FAdd:
- // case Instruction::FSub:
- // case Instruction::FMul:
- // case Instruction::FDiv:
- FoldsLoad = (Bits == 32 || Bits == 64);
- break;
+ if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
+ // Store the load or its truncated or extended value in FoldedValue.
+ const Instruction *FoldedValue = nullptr;
+ if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) {
+ const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin());
+ assert (UserI->getNumOperands() == 2 && "Expected a binop.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == FoldedValue)
+ continue;
+
+ if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){
+ LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
+ if (!OtherLoad &&
+ (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
+ isa<ZExtInst>(OtherOp)))
+ OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
+ if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/))
+ return i == 0; // Both operands foldable.
+ }
}
- if (FoldsLoad) {
- assert (UserI->getNumOperands() == 2 &&
- "Expected to only handle binops.");
-
- // UserI can't fold two loads, so in that case return 0 cost only
- // half of the time.
- for (unsigned i = 0; i < 2; ++i) {
- if (UserI->getOperand(i) == I)
- continue;
- if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
- if (LI->hasOneUse())
- return i == 0;
- }
- }
+ return 0; // Only I is foldable in user.
+ }
+ }
+ unsigned NumOps =
+ (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
+
+ // Store/Load reversed saves one instruction.
+ if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+ if (Opcode == Instruction::Load && I->hasOneUse()) {
+ const Instruction *LdUser = cast<Instruction>(*I->user_begin());
+ // In case of load -> bswap -> store, return normal cost for the load.
+ if (isBswapIntrinsicCall(LdUser) &&
+ (!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin())))
return 0;
- }
+ }
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ const Value *StoredVal = SI->getValueOperand();
+ if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal))
+ return 0;
+ }
}
- unsigned NumOps = getNumberOfParts(Src);
-
if (Src->getScalarSizeInBits() == 128)
// 128 bit scalars are held in a pair of two 64 bit registers.
NumOps *= 2;
@@ -875,34 +1032,94 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return NumOps;
}
+// The generic implementation of getInterleavedMemoryOpCost() is based on
+// adding costs of the memory operations plus all the extracts and inserts
+// needed for using / defining the vector operands. The SystemZ version does
+// roughly the same but bases the computations on vector permutations
+// instead.
int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
- unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
- (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
- assert (WideBits > 0 && "Could not compute size of vector");
- int NumWideParts =
- ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+ // Return the ceiling of dividing A by B.
+ auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
+
+ unsigned NumElts = VecTy->getVectorNumElements();
+ assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
+ unsigned VF = NumElts / Factor;
+ unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
+ unsigned NumVectorMemOps = getNumVectorRegs(VecTy);
+ unsigned NumPermutes = 0;
+
+ if (Opcode == Instruction::Load) {
+ // Loading interleave groups may have gaps, which may mean fewer
+ // loads. Find out how many vectors will be loaded in total, and in how
+ // many of them each value will be in.
+ BitVector UsedInsts(NumVectorMemOps, false);
+ std::vector<BitVector> ValueVecs(Factor, BitVector(NumVectorMemOps, false));
+ for (unsigned Index : Indices)
+ for (unsigned Elt = 0; Elt < VF; ++Elt) {
+ unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
+ UsedInsts.set(Vec);
+ ValueVecs[Index].set(Vec);
+ }
+ NumVectorMemOps = UsedInsts.count();
+
+ for (unsigned Index : Indices) {
+ // Estimate that each loaded source vector containing this Index
+ // requires one operation, except that vperm can handle two input
+ // registers first time for each dst vector.
+ unsigned NumSrcVecs = ValueVecs[Index].count();
+ unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U);
+ assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");
+ NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
+ }
+ } else {
+ // Estimate the permutes for each stored vector as the smaller of the
+ // number of elements and the number of source vectors. Subtract one per
+ // dst vector for vperm (S.A.).
+ unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
+ unsigned NumDstVecs = NumVectorMemOps;
+ assert (NumSrcVecs > 1 && "Expected at least two source vectors.");
+ NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
+ }
- // How many source vectors are handled to produce a vectorized operand?
- int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
- int NumSrcParts =
- ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+ // Cost of load/store operations and the permutations needed.
+ return NumVectorMemOps + NumPermutes;
+}
- // A Load group may have gaps.
- unsigned NumOperands =
- ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+ if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
+ return getNumVectorRegs(RetTy); // VPERM
+ return -1;
+}
- // Each needed permute takes two vectors as input.
- if (NumSrcParts > 1)
- NumSrcParts--;
- int NumPermutes = NumSrcParts * NumOperands;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args,
+ FastMathFlags FMF, unsigned VF) {
+ int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+ if (Cost != -1)
+ return Cost;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
- // Cost of load/store operations and the permutations needed.
- return NumWideParts + NumPermutes;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+ if (Cost != -1)
+ return Cost;
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
+ FMF, ScalarizationCostPassed);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b11a6f0a837..e79bee1ea3a8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -37,6 +37,8 @@ public:
/// \name Scalar TTI Implementations
/// @{
+ unsigned getInliningThresholdMultiplier() { return 3; }
+
int getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
@@ -78,11 +80,14 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+ const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
@@ -90,7 +95,16 @@ public:
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
+
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
/// @}
};
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 6bcf60fafc3e..bb937923b47e 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -45,6 +45,9 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
Mang = new Mangler();
InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), *Ctx,
TM.getCodeModel() == CodeModel::Large);
+
+ // Reset various EH DWARF encodings.
+ PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 092f5ea4104b..39d5705b2a53 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -40,12 +40,7 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
RequireStructuredCFG(false), DefaultOptions(Options), Options(Options) {
}
-TargetMachine::~TargetMachine() {
- delete AsmInfo;
- delete MRI;
- delete MII;
- delete STI;
-}
+TargetMachine::~TargetMachine() = default;
bool TargetMachine::isPositionIndependent() const {
return getRelocationModel() == Reloc::PIC_;
@@ -141,6 +136,15 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
if (GV && GV->hasDLLImportStorageClass())
return false;
+ // On MinGW, variables that haven't been declared with DLLImport may still
+ // end up automatically imported by the linker. To make this feasible,
+ // don't assume the variables to be DSO local unless we actually know
+ // that for sure. This only has to be done for variables; for functions
+ // the linker can insert thunks for calling functions from another DLL.
+ if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
+ isa<GlobalVariable>(GV))
+ return false;
+
// Every other GV is local on COFF.
// Make an exception for windows OS in the triple: Some firmware builds use
// *-win32-macho triples. This (accidentally?) produced windows relocations
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 37d398d580f8..bae45ae28c45 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -115,6 +115,15 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
case LLVMRelocDynamicNoPic:
RM = Reloc::DynamicNoPIC;
break;
+ case LLVMRelocROPI:
+ RM = Reloc::ROPI;
+ break;
+ case LLVMRelocRWPI:
+ RM = Reloc::RWPI;
+ break;
+ case LLVMRelocROPI_RWPI:
+ RM = Reloc::ROPI_RWPI;
+ break;
default:
break;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 2d92b93ca704..0a5908f43790 100644
--- a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -18,13 +18,14 @@
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
@@ -34,27 +35,10 @@ using namespace llvm;
namespace {
-// We store register types as SimpleValueType to retain SIMD layout
-// information, but must also be able to supply them as the (unnamed)
-// register enum from WebAssemblyRegisterInfo.td/.inc.
-static unsigned MVTToWasmReg(MVT::SimpleValueType Type) {
- switch(Type) {
- case MVT::i32: return WebAssembly::I32_0;
- case MVT::i64: return WebAssembly::I64_0;
- case MVT::f32: return WebAssembly::F32_0;
- case MVT::f64: return WebAssembly::F64_0;
- case MVT::v16i8: return WebAssembly::V128_0;
- case MVT::v8i16: return WebAssembly::V128_0;
- case MVT::v4i32: return WebAssembly::V128_0;
- case MVT::v4f32: return WebAssembly::V128_0;
- default: return MVT::INVALID_SIMPLE_VALUE_TYPE;
- }
-}
-
/// WebAssemblyOperand - Instances of this class represent the operands in a
/// parsed WASM machine instruction.
struct WebAssemblyOperand : public MCParsedAsmOperand {
- enum KindTy { Token, Local, Stack, Integer, Float, Symbol } Kind;
+ enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
SMLoc StartLoc, EndLoc;
@@ -62,19 +46,6 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
StringRef Tok;
};
- struct RegOp {
- // This is a (virtual) local or stack register represented as 0..
- unsigned RegNo;
- // In most targets, the register number also encodes the type, but for
- // wasm we have to track that seperately since we have an unbounded
- // number of registers.
- // This has the unfortunate side effect that we supply a different value
- // to the table-gen matcher at different times in the process (when it
- // calls getReg() or addRegOperands().
- // TODO: While this works, it feels brittle. and would be nice to clean up.
- MVT::SimpleValueType Type;
- };
-
struct IntOp {
int64_t Val;
};
@@ -87,37 +58,45 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
const MCExpr *Exp;
};
+ struct BrLOp {
+ std::vector<unsigned> List;
+ };
+
union {
struct TokOp Tok;
- struct RegOp Reg;
struct IntOp Int;
struct FltOp Flt;
struct SymOp Sym;
+ struct BrLOp BrL;
};
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
- : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
- WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, RegOp R)
- : Kind(K), StartLoc(Start), EndLoc(End), Reg(R) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
- : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
- : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
- : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
+
+ ~WebAssemblyOperand() {
+ if (isBrList())
+ BrL.~BrLOp();
+ }
bool isToken() const override { return Kind == Token; }
- bool isImm() const override { return Kind == Integer ||
- Kind == Float ||
- Kind == Symbol; }
- bool isReg() const override { return Kind == Local || Kind == Stack; }
+ bool isImm() const override {
+ return Kind == Integer || Kind == Float || Kind == Symbol;
+ }
bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
+ bool isBrList() const { return Kind == BrList; }
unsigned getReg() const override {
- assert(isReg());
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to match the type of register, see RegOp above.
- return MVTToWasmReg(Reg.Type);
+ llvm_unreachable("Assembly inspects a register operand");
+ return 0;
}
StringRef getToken() const {
@@ -128,19 +107,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
SMLoc getStartLoc() const override { return StartLoc; }
SMLoc getEndLoc() const override { return EndLoc; }
- void addRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- assert(isReg() && "Not a register operand!");
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to output the actual register index, see RegOp above.
- unsigned R = Reg.RegNo;
- if (Kind == Stack) {
- // A stack register is represented as a large negative number.
- // See WebAssemblyRegNumbering::runOnMachineFunction and
- // getWARegStackId for why this | is needed.
- R |= INT32_MIN;
- }
- Inst.addOperand(MCOperand::createReg(R));
+ void addRegOperands(MCInst &, unsigned) const {
+ // Required by the assembly matcher.
+ llvm_unreachable("Assembly matcher creates register operands");
}
void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -155,17 +124,17 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
llvm_unreachable("Should be immediate or symbol!");
}
+ void addBrListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && isBrList() && "Invalid BrList!");
+ for (auto Br : BrL.List)
+ Inst.addOperand(MCOperand::createImm(Br));
+ }
+
void print(raw_ostream &OS) const override {
switch (Kind) {
case Token:
OS << "Tok:" << Tok.Tok;
break;
- case Local:
- OS << "Loc:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
- case Stack:
- OS << "Stk:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
case Integer:
OS << "Int:" << Int.Val;
break;
@@ -175,6 +144,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
case Symbol:
OS << "Sym:" << Sym.Exp;
break;
+ case BrList:
+ OS << "BrList:" << BrL.List.size();
+ break;
}
}
};
@@ -182,352 +154,526 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
class WebAssemblyAsmParser final : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmLexer &Lexer;
- // These are for the current function being parsed:
- // These are vectors since register assignments are so far non-sparse.
- // Replace by map if necessary.
- std::vector<MVT::SimpleValueType> LocalTypes;
- std::vector<MVT::SimpleValueType> StackTypes;
- MCSymbol *LastLabel;
+
+ // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
+ std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+
+ // Order of labels, directives and instructions in a .s file have no
+ // syntactical enforcement. This class is a callback from the actual parser,
+ // and yet we have to be feeding data to the streamer in a very particular
+ // order to ensure a correct binary encoding that matches the regular backend
+ // (the streamer does not enforce this). This "state machine" enum helps
+ // guarantee that correct order.
+ enum ParserState {
+ FileStart,
+ Label,
+ FunctionStart,
+ FunctionLocals,
+ Instructions,
+ } CurrentState = FileStart;
+
+ // For ensuring blocks are properly nested.
+ enum NestingType {
+ Function,
+ Block,
+ Loop,
+ Try,
+ If,
+ Else,
+ Undefined,
+ };
+ std::vector<NestingType> NestingStack;
+
+ // We track this to see if a .functype following a label is the same,
+ // as this is how we recognize the start of a function.
+ MCSymbol *LastLabel = nullptr;
public:
- WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
- const MCInstrInfo &mii, const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
- Lexer(Parser.getLexer()), LastLabel(nullptr) {
+ WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
+ Lexer(Parser.getLexer()) {
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
#define GET_ASSEMBLER_HEADER
#include "WebAssemblyGenAsmMatcher.inc"
// TODO: This is required to be implemented, but appears unused.
- bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/,
- SMLoc &/*EndLoc*/) override {
+ bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
+ SMLoc & /*EndLoc*/) override {
llvm_unreachable("ParseRegister is not implemented.");
}
- bool Error(const StringRef &msg, const AsmToken &tok) {
- return Parser.Error(tok.getLoc(), msg + tok.getString());
+ bool error(const Twine &Msg, const AsmToken &Tok) {
+ return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
+ }
+
+ bool error(const Twine &Msg) {
+ return Parser.Error(Lexer.getTok().getLoc(), Msg);
+ }
+
+ void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+ Signatures.push_back(std::move(Sig));
+ }
+
+ std::pair<StringRef, StringRef> nestingString(NestingType NT) {
+ switch (NT) {
+ case Function:
+ return {"function", "end_function"};
+ case Block:
+ return {"block", "end_block"};
+ case Loop:
+ return {"loop", "end_loop"};
+ case Try:
+ return {"try", "end_try"};
+ case If:
+ return {"if", "end_if"};
+ case Else:
+ return {"else", "end_if"};
+ default:
+ llvm_unreachable("unknown NestingType");
+ }
+ }
+
+ void push(NestingType NT) { NestingStack.push_back(NT); }
+
+ bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
+ if (NestingStack.empty())
+ return error(Twine("End of block construct with no start: ") + Ins);
+ auto Top = NestingStack.back();
+ if (Top != NT1 && Top != NT2)
+ return error(Twine("Block construct type mismatch, expected: ") +
+ nestingString(Top).second + ", instead got: " + Ins);
+ NestingStack.pop_back();
+ return false;
+ }
+
+ bool ensureEmptyNestingStack() {
+ auto err = !NestingStack.empty();
+ while (!NestingStack.empty()) {
+ error(Twine("Unmatched block construct(s) at function end: ") +
+ nestingString(NestingStack.back()).first);
+ NestingStack.pop_back();
+ }
+ return err;
}
- bool IsNext(AsmToken::TokenKind Kind) {
- auto ok = Lexer.is(Kind);
- if (ok) Parser.Lex();
- return ok;
+ bool isNext(AsmToken::TokenKind Kind) {
+ auto Ok = Lexer.is(Kind);
+ if (Ok)
+ Parser.Lex();
+ return Ok;
}
- bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
- if (!IsNext(Kind))
- return Error(std::string("Expected ") + KindName + ", instead got: ",
+ bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+ if (!isNext(Kind))
+ return error(std::string("Expected ") + KindName + ", instead got: ",
Lexer.getTok());
return false;
}
- MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
- // Derive type from .param .local decls, or the instruction itself.
- return StringSwitch<MVT::SimpleValueType>(RegType)
- .Case("i32", MVT::i32)
- .Case("i64", MVT::i64)
- .Case("f32", MVT::f32)
- .Case("f64", MVT::f64)
- .Case("i8x16", MVT::v16i8)
- .Case("i16x8", MVT::v8i16)
- .Case("i32x4", MVT::v4i32)
- .Case("f32x4", MVT::v4f32)
- .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ StringRef expectIdent() {
+ if (!Lexer.is(AsmToken::Identifier)) {
+ error("Expected identifier, got: ", Lexer.getTok());
+ return StringRef();
+ }
+ auto Name = Lexer.getTok().getString();
+ Parser.Lex();
+ return Name;
}
- MVT::SimpleValueType &GetType(
- std::vector<MVT::SimpleValueType> &Types, size_t i) {
- Types.resize(std::max(i + 1, Types.size()), MVT::INVALID_SIMPLE_VALUE_TYPE);
- return Types[i];
+ Optional<wasm::ValType> parseType(const StringRef &Type) {
+ // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
+ // "invalid" value.
+ if (Type == "i32")
+ return wasm::ValType::I32;
+ if (Type == "i64")
+ return wasm::ValType::I64;
+ if (Type == "f32")
+ return wasm::ValType::F32;
+ if (Type == "f64")
+ return wasm::ValType::F64;
+ if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
+ Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
+ Type == "f64x2")
+ return wasm::ValType::V128;
+ return Optional<wasm::ValType>();
}
- bool ParseReg(OperandVector &Operands, StringRef TypePrefix) {
- if (Lexer.is(AsmToken::Integer)) {
- auto &Local = Lexer.getTok();
- // This is a reference to a local, turn it into a virtual register.
- auto LocalNo = static_cast<unsigned>(Local.getIntVal());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Local, Local.getLoc(),
- Local.getEndLoc(),
- WebAssemblyOperand::RegOp{LocalNo,
- GetType(LocalTypes, LocalNo)}));
- Parser.Lex();
- } else if (Lexer.is(AsmToken::Identifier)) {
- auto &StackRegTok = Lexer.getTok();
- // These are push/pop/drop pseudo stack registers, which we turn
- // into virtual registers also. The stackify pass will later turn them
- // back into implicit stack references if possible.
- auto StackReg = StackRegTok.getString();
- auto StackOp = StackReg.take_while([](char c) { return isalpha(c); });
- auto Reg = StackReg.drop_front(StackOp.size());
- unsigned long long ParsedRegNo = 0;
- if (!Reg.empty() && getAsUnsignedInteger(Reg, 10, ParsedRegNo))
- return Error("Cannot parse stack register index: ", StackRegTok);
- unsigned RegNo = static_cast<unsigned>(ParsedRegNo);
- if (StackOp == "push") {
- // This defines a result, record register type.
- auto RegType = ParseRegType(TypePrefix);
- GetType(StackTypes, RegNo) = RegType;
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "pop") {
- // This uses a previously defined stack value.
- auto RegType = GetType(StackTypes, RegNo);
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "drop") {
- // This operand will be dropped, since it is part of an instruction
- // whose result is void.
- } else {
- return Error("Unknown stack register prefix: ", StackRegTok);
- }
+ WebAssembly::ExprType parseBlockType(StringRef ID) {
+ return StringSwitch<WebAssembly::ExprType>(ID)
+ .Case("i32", WebAssembly::ExprType::I32)
+ .Case("i64", WebAssembly::ExprType::I64)
+ .Case("f32", WebAssembly::ExprType::F32)
+ .Case("f64", WebAssembly::ExprType::F64)
+ .Case("v128", WebAssembly::ExprType::V128)
+ .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+ .Case("void", WebAssembly::ExprType::Void)
+ .Default(WebAssembly::ExprType::Invalid);
+ }
+
+ bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
+ while (Lexer.is(AsmToken::Identifier)) {
+ auto Type = parseType(Lexer.getTok().getString());
+ if (!Type)
+ return true;
+ Types.push_back(Type.getValue());
Parser.Lex();
- } else {
- return Error(
- "Expected identifier/integer following $, instead got: ",
- Lexer.getTok());
+ if (!isNext(AsmToken::Comma))
+ break;
}
- IsNext(AsmToken::Equal);
return false;
}
- void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
+ void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
auto &Int = Lexer.getTok();
int64_t Val = Int.getIntVal();
- if (IsNegative) Val = -Val;
+ if (IsNegative)
+ Val = -Val;
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, Int.getLoc(),
- Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
+ WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
+ WebAssemblyOperand::IntOp{Val}));
Parser.Lex();
}
- bool ParseOperandStartingWithInteger(bool IsNegative,
- OperandVector &Operands,
- StringRef InstType) {
- ParseSingleInteger(IsNegative, Operands);
- if (Lexer.is(AsmToken::LParen)) {
- // Parse load/store operands of the form: offset($reg)align
- auto &LParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- LParen.getLoc(),
- LParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- LParen.getString()}));
- Parser.Lex();
- if (Expect(AsmToken::Dollar, "register")) return true;
- if (ParseReg(Operands, InstType)) return true;
- auto &RParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- RParen.getLoc(),
- RParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- RParen.getString()}));
- if (Expect(AsmToken::RParen, ")")) return true;
- if (Lexer.is(AsmToken::Integer)) {
- ParseSingleInteger(false, Operands);
+ bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
+ StringRef InstName) {
+ parseSingleInteger(IsNegative, Operands);
+ // FIXME: there is probably a cleaner way to do this.
+ auto IsLoadStore = InstName.startswith("load") ||
+ InstName.startswith("store") ||
+ InstName.startswith("atomic_load") ||
+ InstName.startswith("atomic_store");
+ if (IsLoadStore) {
+ // Parse load/store operands of the form: offset align
+ auto &Offset = Lexer.getTok();
+ if (Offset.is(AsmToken::Integer)) {
+ parseSingleInteger(false, Operands);
} else {
// Alignment not specified.
// FIXME: correctly derive a default from the instruction.
+ // We can't just call WebAssembly::GetDefaultP2Align since we don't have
+ // an opcode until after the assembly matcher.
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, RParen.getLoc(),
- RParen.getEndLoc(), WebAssemblyOperand::IntOp{0}));
+ WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
+ WebAssemblyOperand::IntOp{0}));
}
}
return false;
}
- bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name,
+ void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
+ WebAssembly::ExprType BT) {
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Integer, NameLoc, NameLoc,
+ WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
+ }
+
+ bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override {
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc,
- SMLoc::getFromPointer(
- NameLoc.getPointer() + Name.size()),
- WebAssemblyOperand::TokOp{
- StringRef(NameLoc.getPointer(),
- Name.size())}));
+ // Note: Name does NOT point into the sourcecode, but to a local, so
+ // use NameLoc instead.
+ Name = StringRef(NameLoc.getPointer(), Name.size());
+
+ // WebAssembly has instructions with / in them, which AsmLexer parses
+ // as seperate tokens, so if we find such tokens immediately adjacent (no
+ // whitespace), expand the name to include them:
+ for (;;) {
+ auto &Sep = Lexer.getTok();
+ if (Sep.getLoc().getPointer() != Name.end() ||
+ Sep.getKind() != AsmToken::Slash)
+ break;
+ // Extend name with /
+ Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
+ Parser.Lex();
+ // We must now find another identifier, or error.
+ auto &Id = Lexer.getTok();
+ if (Id.getKind() != AsmToken::Identifier ||
+ Id.getLoc().getPointer() != Name.end())
+ return error("Incomplete instruction name: ", Id);
+ Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
+ Parser.Lex();
+ }
+
+ // Now construct the name as first operand.
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
+ WebAssemblyOperand::TokOp{Name}));
auto NamePair = Name.split('.');
// If no '.', there is no type prefix.
- if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second);
+ auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
+
+ // If this instruction is part of a control flow structure, ensure
+ // proper nesting.
+ bool ExpectBlockType = false;
+ if (BaseName == "block") {
+ push(Block);
+ ExpectBlockType = true;
+ } else if (BaseName == "loop") {
+ push(Loop);
+ ExpectBlockType = true;
+ } else if (BaseName == "try") {
+ push(Try);
+ ExpectBlockType = true;
+ } else if (BaseName == "if") {
+ push(If);
+ ExpectBlockType = true;
+ } else if (BaseName == "else") {
+ if (pop(BaseName, If))
+ return true;
+ push(Else);
+ } else if (BaseName == "catch") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "catch_all") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "end_if") {
+ if (pop(BaseName, If, Else))
+ return true;
+ } else if (BaseName == "end_try") {
+ if (pop(BaseName, Try))
+ return true;
+ } else if (BaseName == "end_loop") {
+ if (pop(BaseName, Loop))
+ return true;
+ } else if (BaseName == "end_block") {
+ if (pop(BaseName, Block))
+ return true;
+ } else if (BaseName == "end_function") {
+ if (pop(BaseName, Function) || ensureEmptyNestingStack())
+ return true;
+ }
+
while (Lexer.isNot(AsmToken::EndOfStatement)) {
auto &Tok = Lexer.getTok();
switch (Tok.getKind()) {
- case AsmToken::Dollar: {
- Parser.Lex();
- if (ParseReg(Operands, NamePair.first)) return true;
- break;
- }
case AsmToken::Identifier: {
auto &Id = Lexer.getTok();
- const MCExpr *Val;
- SMLoc End;
- if (Parser.parsePrimaryExpr(Val, End))
- return Error("Cannot parse symbol: ", Lexer.getTok());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Symbol, Id.getLoc(),
- Id.getEndLoc(), WebAssemblyOperand::SymOp{Val}));
+ if (ExpectBlockType) {
+ // Assume this identifier is a block_type.
+ auto BT = parseBlockType(Id.getString());
+ if (BT == WebAssembly::ExprType::Invalid)
+ return error("Unknown block type: ", Id);
+ addBlockTypeOperand(Operands, NameLoc, BT);
+ Parser.Lex();
+ } else {
+ // Assume this identifier is a label.
+ const MCExpr *Val;
+ SMLoc End;
+ if (Parser.parsePrimaryExpr(Val, End))
+ return error("Cannot parse symbol: ", Lexer.getTok());
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
+ WebAssemblyOperand::SymOp{Val}));
+ }
break;
}
case AsmToken::Minus:
Parser.Lex();
if (Lexer.isNot(AsmToken::Integer))
- return Error("Expected integer instead got: ", Lexer.getTok());
- if (ParseOperandStartingWithInteger(true, Operands, NamePair.first))
+ return error("Expected integer instead got: ", Lexer.getTok());
+ if (parseOperandStartingWithInteger(true, Operands, BaseName))
return true;
break;
case AsmToken::Integer:
- if (ParseOperandStartingWithInteger(false, Operands, NamePair.first))
+ if (parseOperandStartingWithInteger(false, Operands, BaseName))
return true;
break;
case AsmToken::Real: {
double Val;
if (Tok.getString().getAsDouble(Val, false))
- return Error("Cannot parse real: ", Tok);
+ return error("Cannot parse real: ", Tok);
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Float, Tok.getLoc(),
- Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
+ WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
+ WebAssemblyOperand::FltOp{Val}));
+ Parser.Lex();
+ break;
+ }
+ case AsmToken::LCurly: {
Parser.Lex();
+ auto Op = make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
+ if (!Lexer.is(AsmToken::RCurly))
+ for (;;) {
+ Op->BrL.List.push_back(Lexer.getTok().getIntVal());
+ expect(AsmToken::Integer, "integer");
+ if (!isNext(AsmToken::Comma))
+ break;
+ }
+ expect(AsmToken::RCurly, "}");
+ Operands.push_back(std::move(Op));
break;
}
default:
- return Error("Unexpected token in operand: ", Tok);
+ return error("Unexpected token in operand: ", Tok);
}
if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Expect(AsmToken::Comma, ",")) return true;
- }
- }
- Parser.Lex();
- // Call instructions are vararg, but the tablegen matcher doesn't seem to
- // support that, so for now we strip these extra operands.
- // This is problematic if these arguments are not simple $pop stack
- // registers, since e.g. a local register would get lost, so we check for
- // this. This can be the case when using -disable-wasm-explicit-locals
- // which currently s2wasm requires.
- // TODO: Instead, we can move this code to MatchAndEmitInstruction below and
- // actually generate get_local instructions on the fly.
- // Or even better, improve the matcher to support vararg?
- auto IsIndirect = NamePair.second == "call_indirect";
- if (IsIndirect || NamePair.second == "call") {
- // Figure out number of fixed operands from the instruction.
- size_t CallOperands = 1; // The name token.
- if (!IsIndirect) CallOperands++; // The function index.
- if (!NamePair.first.empty()) CallOperands++; // The result register.
- if (Operands.size() > CallOperands) {
- // Ensure operands we drop are all $pop.
- for (size_t I = CallOperands; I < Operands.size(); I++) {
- auto Operand =
- reinterpret_cast<WebAssemblyOperand *>(Operands[I].get());
- if (Operand->Kind != WebAssemblyOperand::Stack)
- Parser.Error(NameLoc,
- "Call instruction has non-stack arguments, if this code was "
- "generated with -disable-wasm-explicit-locals please remove it");
- }
- // Drop unneeded operands.
- Operands.resize(CallOperands);
+ if (expect(AsmToken::Comma, ","))
+ return true;
}
}
- // Block instructions require a signature index, but these are missing in
- // assembly, so we add a dummy one explicitly (since we have no control
- // over signature tables here, we assume these will be regenerated when
- // the wasm module is generated).
- if (NamePair.second == "block" || NamePair.second == "loop") {
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, NameLoc,
- NameLoc, WebAssemblyOperand::IntOp{-1}));
- }
- // These don't specify the type, which has to derived from the local index.
- if (NamePair.second == "get_local" || NamePair.second == "tee_local") {
- if (Operands.size() >= 3 && Operands[1]->isReg() &&
- Operands[2]->isImm()) {
- auto Op1 = reinterpret_cast<WebAssemblyOperand *>(Operands[1].get());
- auto Op2 = reinterpret_cast<WebAssemblyOperand *>(Operands[2].get());
- auto Type = GetType(LocalTypes, static_cast<size_t>(Op2->Int.Val));
- Op1->Reg.Type = Type;
- GetType(StackTypes, Op1->Reg.RegNo) = Type;
- }
+ if (ExpectBlockType && Operands.size() == 1) {
+ // Support blocks with no operands as default to void.
+ addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);
}
+ Parser.Lex();
return false;
}
void onLabelParsed(MCSymbol *Symbol) override {
LastLabel = Symbol;
+ CurrentState = Label;
}
+ bool parseSignature(wasm::WasmSignature *Signature) {
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ if (expect(AsmToken::MinusGreater, "->"))
+ return true;
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Returns))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ return false;
+ }
+
+ // This function processes wasm-specific directives streamed to
+ // WebAssemblyTargetStreamer, all others go to the generic parser
+ // (see WasmAsmParser).
bool ParseDirective(AsmToken DirectiveID) override {
+ // This function has a really weird return value behavior that is different
+ // from all the other parsing functions:
+ // - return true && no tokens consumed -> don't know this directive / let
+ // the generic parser handle it.
+ // - return true && tokens consumed -> a parsing error occurred.
+ // - return false -> processed this directive successfully.
assert(DirectiveID.getKind() == AsmToken::Identifier);
auto &Out = getStreamer();
- auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
- *Out.getTargetStreamer());
- // TODO: we're just parsing the subset of directives we're interested in,
- // and ignoring ones we don't recognise. We should ideally verify
- // all directives here.
- if (DirectiveID.getString() == ".type") {
- // This could be the start of a function, check if followed by
- // "label,@function"
- if (!(IsNext(AsmToken::Identifier) &&
- IsNext(AsmToken::Comma) &&
- IsNext(AsmToken::At) &&
- Lexer.is(AsmToken::Identifier)))
- return Error("Expected label,@type declaration, got: ", Lexer.getTok());
- if (Lexer.getTok().getString() == "function") {
- // Track locals from start of function.
- LocalTypes.clear();
- StackTypes.clear();
- }
- Parser.Lex();
- //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
- } else if (DirectiveID.getString() == ".param" ||
- DirectiveID.getString() == ".local") {
- // Track the number of locals, needed for correct virtual register
- // assignment elsewhere.
- // Also output a directive to the streamer.
- std::vector<MVT> Params;
- std::vector<MVT> Locals;
- while (Lexer.is(AsmToken::Identifier)) {
- auto RegType = ParseRegType(Lexer.getTok().getString());
- if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true;
- LocalTypes.push_back(RegType);
- if (DirectiveID.getString() == ".param") {
- Params.push_back(RegType);
- } else {
- Locals.push_back(RegType);
- }
- Parser.Lex();
- if (!IsNext(AsmToken::Comma)) break;
+ auto &TOut =
+ reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+
+ // TODO: any time we return an error, at least one token must have been
+ // consumed, otherwise this will not signal an error to the caller.
+ if (DirectiveID.getString() == ".globaltype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto TypeTok = Lexer.getTok();
+ auto TypeName = expectIdent();
+ if (TypeName.empty())
+ return true;
+ auto Type = parseType(TypeName);
+ if (!Type)
+ return error("Unknown type in .globaltype directive: ", TypeTok);
+ // Now set this symbol with the correct type.
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
+ // And emit the directive again.
+ TOut.emitGlobalType(WasmSym);
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".functype") {
+ // This code has to send things to the streamer similar to
+ // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
+ // TODO: would be good to factor this into a common function, but the
+ // assembler and backend really don't share any common code, and this code
+ // parses the locals seperately.
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ if (CurrentState == Label && WasmSym == LastLabel) {
+ // This .functype indicates a start of a function.
+ if (ensureEmptyNestingStack())
+ return true;
+ CurrentState = FunctionStart;
+ push(Function);
}
- assert(LastLabel);
- TOut.emitParam(LastLabel, Params);
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseSignature(Signature.get()))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ TOut.emitFunctionType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".eventtype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+ TOut.emitEventType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".local") {
+ if (CurrentState != FunctionStart)
+ return error(".local directive should follow the start of a function",
+ Lexer.getTok());
+ SmallVector<wasm::ValType, 4> Locals;
+ if (parseRegTypeList(Locals))
+ return true;
TOut.emitLocal(Locals);
- } else {
- // For now, ignore anydirective we don't recognize:
- while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex();
+ CurrentState = FunctionLocals;
+ return expect(AsmToken::EndOfStatement, "EOL");
}
- return Expect(AsmToken::EndOfStatement, "EOL");
+
+ return true; // We didn't process this directive.
}
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/,
- OperandVector &Operands,
- MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override {
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
+ if (CurrentState == FunctionStart) {
+ // This is the first instruction in a function, but we haven't seen
+ // a .local directive yet. The streamer requires locals to be encoded
+ // as a prelude to the instructions, so emit an empty list of locals
+ // here.
+ auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+ *Out.getTargetStreamer());
+ TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+ }
+ CurrentState = Instructions;
Out.EmitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature:
- return Parser.Error(IDLoc,
- "instruction requires a WASM feature not currently enabled");
+ return Parser.Error(
+ IDLoc, "instruction requires a WASM feature not currently enabled");
case Match_MnemonicFail:
return Parser.Error(IDLoc, "invalid instruction");
case Match_NearMisses:
@@ -547,6 +693,8 @@ public:
}
llvm_unreachable("Implement any new match types added!");
}
+
+ void onEndOfFile() override { ensureEmptyNestingStack(); }
};
} // end anonymous namespace
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 2f0960271e30..6acc9b20eed2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -16,7 +16,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -37,6 +36,8 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
#include "WebAssemblyGenDisassemblerTables.inc"
namespace {
+static constexpr int WebAssemblyInstructionTableSize = 256;
+
class WebAssemblyDisassembler final : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MCII;
@@ -75,31 +76,43 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
return V;
}
-static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, bool Signed) {
+static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
+ bool Signed = false) {
unsigned N = 0;
const char *Error = nullptr;
- auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
- Bytes.data() + Bytes.size(), &Error)
- : static_cast<int64_t>(
- decodeULEB128(Bytes.data() + Size, &N,
- Bytes.data() + Bytes.size(), &Error));
+ Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(), &Error)
+ : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(),
+ &Error));
if (Error)
return false;
Size += N;
+ return true;
+}
+
+static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, bool Signed) {
+ int64_t Val;
+ if (!nextLEB(Val, Bytes, Size, Signed))
+ return false;
MI.addOperand(MCOperand::createImm(Val));
return true;
}
template <typename T>
-bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
+bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
if (Size + sizeof(T) > Bytes.size())
return false;
T Val;
memcpy(&Val, Bytes.data() + Size, sizeof(T));
support::endian::byte_swap<T, support::endianness::little>(Val);
Size += sizeof(T);
- MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+ if (std::is_floating_point<T>::value) {
+ MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+ } else {
+ MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
+ }
return true;
}
@@ -108,7 +121,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
raw_ostream & /*OS*/, raw_ostream &CS) const {
CommentStream = &CS;
Size = 0;
- auto Opc = nextByte(Bytes, Size);
+ int Opc = nextByte(Bytes, Size);
if (Opc < 0)
return MCDisassembler::Fail;
const auto *WasmInst = &InstructionTable0[Opc];
@@ -124,10 +137,12 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
if (!WasmInst)
return MCDisassembler::Fail;
- Opc = nextByte(Bytes, Size);
- if (Opc < 0)
+ int64_t PrefixedOpc;
+ if (!nextLEB(PrefixedOpc, Bytes, Size))
return MCDisassembler::Fail;
- WasmInst += Opc;
+ if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
+ return MCDisassembler::Fail;
+ WasmInst += PrefixedOpc;
}
if (WasmInst->ET == ET_Unused)
return MCDisassembler::Fail;
@@ -136,7 +151,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MI.setOpcode(WasmInst->Opcode);
// Parse any operands.
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
- switch (WasmInst->Operands[OPI]) {
+ auto OT = OperandTable[WasmInst->OperandStart + OPI];
+ switch (OT) {
// ULEB operands:
case WebAssembly::OPERAND_BASIC_BLOCK:
case WebAssembly::OPERAND_LOCAL:
@@ -152,32 +168,68 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
}
// SLEB operands:
case WebAssembly::OPERAND_I32IMM:
- case WebAssembly::OPERAND_I64IMM:
- case WebAssembly::OPERAND_SIGNATURE: {
+ case WebAssembly::OPERAND_I64IMM: {
if (!parseLEBImmediate(MI, Size, Bytes, true))
return MCDisassembler::Fail;
break;
}
+ // block_type operands (uint8_t).
+ case WebAssembly::OPERAND_SIGNATURE: {
+ if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
// FP operands.
case WebAssembly::OPERAND_F32IMM: {
- if (!parseFPImmediate<float>(MI, Size, Bytes))
+ if (!parseImmediate<float>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
case WebAssembly::OPERAND_F64IMM: {
- if (!parseFPImmediate<double>(MI, Size, Bytes))
+ if (!parseImmediate<double>(MI, Size, Bytes))
return MCDisassembler::Fail;
break;
}
- case MCOI::OPERAND_REGISTER: {
- // These are NOT actually in the instruction stream, but MC is going to
- // expect operands to be present for them!
- // FIXME: can MC re-generate register assignments or do we have to
- // do this? Since this function decodes a single instruction, we don't
- // have the proper context for tracking an operand stack here.
- MI.addOperand(MCOperand::createReg(0));
+ // Vector lane operands (not LEB encoded).
+ case WebAssembly::OPERAND_VEC_I8IMM: {
+ if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I16IMM: {
+ if (!parseImmediate<uint16_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I32IMM: {
+ if (!parseImmediate<uint32_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_VEC_I64IMM: {
+ if (!parseImmediate<uint64_t>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_BRLIST: {
+ int64_t TargetTableLen;
+ if (!nextLEB(TargetTableLen, Bytes, Size, false))
+ return MCDisassembler::Fail;
+ for (int64_t I = 0; I < TargetTableLen; I++) {
+ if (!parseLEBImmediate(MI, Size, Bytes, false))
+ return MCDisassembler::Fail;
+ }
+ // Default case.
+ if (!parseLEBImmediate(MI, Size, Bytes, false))
+ return MCDisassembler::Fail;
break;
}
+ case MCOI::OPERAND_REGISTER:
+ // The tablegen header currently does not have any register operands since
+ // we use only the stack (_S) instructions.
+ // If you hit this that probably means a bad instruction definition in
+ // tablegen.
+ llvm_unreachable("Register operand in WebAssemblyDisassembler");
default:
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 10fa798ac8d7..15532d7ff1a6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -35,12 +35,12 @@ using namespace llvm;
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
+ : MCInstPrinter(MAI, MII, MRI) {}
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
- // Note that there's an implicit get_local/set_local here!
+ // Note that there's an implicit local.get/local.set here!
OS << "$" << RegNo;
}
@@ -57,9 +57,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
// we have an extra flags operand which is not currently printed, for
// compatiblity reasons.
- if (i != 0 &&
- (MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID ||
- i != Desc.getNumOperands()))
+ if (i != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
+ MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
+ i != Desc.getNumOperands()))
OS << ", ";
printOperand(MI, i, OS);
}
@@ -70,25 +70,76 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (CommentStream) {
// Observe any effects on the control flow stack, for use in annotating
// control flow label references.
- switch (MI->getOpcode()) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
default:
break;
- case WebAssembly::LOOP: {
+
+ case WebAssembly::LOOP:
+ case WebAssembly::LOOP_S:
printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
break;
- }
+
case WebAssembly::BLOCK:
+ case WebAssembly::BLOCK_S:
ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
break;
+
+ case WebAssembly::TRY:
+ case WebAssembly::TRY_S:
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ EHPadStack.push_back(EHPadStackCounter++);
+ LastSeenEHInst = TRY;
+ break;
+
case WebAssembly::END_LOOP:
- // Have to guard against an empty stack, in case of mismatched pairs
- // in assembly parsing.
- if (!ControlFlowStack.empty()) ControlFlowStack.pop_back();
+ case WebAssembly::END_LOOP_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ ControlFlowStack.pop_back();
+ }
break;
+
case WebAssembly::END_BLOCK:
- if (!ControlFlowStack.empty()) printAnnotation(
- OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ case WebAssembly::END_BLOCK_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ }
+ break;
+
+ case WebAssembly::END_TRY:
+ case WebAssembly::END_TRY_S:
+ if (ControlFlowStack.empty()) {
+ printAnnotation(OS, "End marker mismatch!");
+ } else {
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ LastSeenEHInst = END_TRY;
+ }
+ break;
+
+ case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I32_S:
+ case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_I64_S:
+ case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
+ // There can be multiple catch instructions for one try instruction, so we
+ // print a label only for the first 'catch' label.
+ if (LastSeenEHInst != CATCH) {
+ if (EHPadStack.empty()) {
+ printAnnotation(OS, "try-catch mismatch!");
+ } else {
+ printAnnotation(OS,
+ "catch" + utostr(EHPadStack.pop_back_val()) + ':');
+ }
+ }
+ LastSeenEHInst = CATCH;
break;
}
@@ -96,34 +147,61 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
unsigned NumFixedOperands = Desc.NumOperands;
SmallSet<uint64_t, 8> Printed;
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- if (!(i < NumFixedOperands
- ? (Desc.OpInfo[i].OperandType ==
- WebAssembly::OPERAND_BASIC_BLOCK)
- : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
- continue;
+ // See if this operand denotes a basic block target.
+ if (i < NumFixedOperands) {
+ // A non-variable_ops operand, check its type.
+ if (Desc.OpInfo[i].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
+ continue;
+ } else {
+ // A variable_ops operand, which currently can be immediates (used in
+ // br_table) which are basic block targets, or for call instructions
+ // when using -wasm-keep-registers (in which case they are registers,
+ // and should not be processed).
+ if (!MI->getOperand(i).isImm())
+ continue;
+ }
uint64_t Depth = MI->getOperand(i).getImm();
if (!Printed.insert(Depth).second)
continue;
- const auto &Pair = ControlFlowStack.rbegin()[Depth];
- printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
- " to label" + utostr(Pair.first));
+
+ if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+ if (Depth > EHPadStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else if (Depth == EHPadStack.size()) {
+ // This can happen when rethrow instruction breaks out of all nests
+ // and throws up to the current function's caller.
+ printAnnotation(OS, utostr(Depth) + ": " + "to caller");
+ } else {
+ uint64_t CatchNo = EHPadStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " + "down to catch" +
+ utostr(CatchNo));
+ }
+
+ } else {
+ if (Depth >= ControlFlowStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else {
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " +
+ (Pair.second ? "up" : "down") + " to label" +
+ utostr(Pair.first));
+ }
+ }
}
}
}
static std::string toString(const APFloat &FP) {
// Print NaNs with custom payloads specially.
- if (FP.isNaN() &&
- !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
+ if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
!FP.bitwiseIsEqual(
APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
APInt AI = FP.bitcastToAPInt();
- return
- std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
- utohexstr(AI.getZExtValue() &
- (AI.getBitWidth() == 32 ? INT64_C(0x007fffff) :
- INT64_C(0x000fffffffffffff)),
- /*LowerCase=*/true);
+ return std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
+ utohexstr(AI.getZExtValue() &
+ (AI.getBitWidth() == 32 ? INT64_C(0x007fffff)
+ : INT64_C(0x000fffffffffffff)),
+ /*LowerCase=*/true);
}
// Use C99's hexadecimal floating-point representation.
@@ -141,9 +219,6 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
- assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
- MII.get(MI->getOpcode()).TSFlags == 0) &&
- "WebAssembly variable_ops register ops don't use TSFlags");
unsigned WAReg = Op.getReg();
if (int(WAReg) >= 0)
printRegName(O, WAReg);
@@ -157,23 +232,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
O << '=';
} else if (Op.isImm()) {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- assert((OpNo < Desc.getNumOperands() ||
- (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate)) &&
- "WebAssemblyII::VariableOpIsImmediate should be set for "
- "variable_ops immediate ops");
- (void)Desc;
- // TODO: (MII.get(MI->getOpcode()).TSFlags &
- // WebAssemblyII::VariableOpImmediateIsLabel)
- // can tell us whether this is an immediate referencing a label in the
- // control flow stack, and it may be nice to pretty-print.
O << Op.getImm();
} else if (Op.isFPImm()) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- assert(OpNo < Desc.getNumOperands() &&
- "Unexpected floating-point immediate as a non-fixed operand");
- assert(Desc.TSFlags == 0 &&
- "WebAssembly variable_ops floating point ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[OpNo];
if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
// TODO: MC converts all floating point immediate operands to double.
@@ -184,78 +245,66 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << ::toString(APFloat(Op.getFPImm()));
}
} else {
- assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
- (MII.get(MI->getOpcode()).TSFlags &
- WebAssemblyII::VariableOpIsImmediate)) &&
- "WebAssemblyII::VariableOpIsImmediate should be set for "
- "variable_ops expr ops");
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
}
-void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
- const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "{";
+ for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) {
+ if (I != OpNo)
+ O << ", ";
+ O << MI->getOperand(I).getImm();
+ }
+ O << "}";
+}
+
+void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
return;
O << ":p2align=" << Imm;
}
-void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
- const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- int64_t Imm = MI->getOperand(OpNo).getImm();
- switch (WebAssembly::ExprType(Imm)) {
- case WebAssembly::ExprType::Void: break;
- case WebAssembly::ExprType::I32: O << "i32"; break;
- case WebAssembly::ExprType::I64: O << "i64"; break;
- case WebAssembly::ExprType::F32: O << "f32"; break;
- case WebAssembly::ExprType::F64: O << "f64"; break;
- case WebAssembly::ExprType::I8x16: O << "i8x16"; break;
- case WebAssembly::ExprType::I16x8: O << "i16x8"; break;
- case WebAssembly::ExprType::I32x4: O << "i32x4"; break;
- case WebAssembly::ExprType::F32x4: O << "f32x4"; break;
- case WebAssembly::ExprType::B8x16: O << "b8x16"; break;
- case WebAssembly::ExprType::B16x8: O << "b16x8"; break;
- case WebAssembly::ExprType::B32x4: O << "b32x4"; break;
- case WebAssembly::ExprType::ExceptRef: O << "except_ref"; break;
- }
+void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ if (Imm != wasm::WASM_TYPE_NORESULT)
+ O << WebAssembly::anyTypeToString(Imm);
}
-const char *llvm::WebAssembly::TypeToString(MVT Ty) {
- switch (Ty.SimpleTy) {
- case MVT::i32:
+// We have various enums representing a subset of these types, use this
+// function to convert any of them to text.
+const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
+ switch (Ty) {
+ case wasm::WASM_TYPE_I32:
return "i32";
- case MVT::i64:
+ case wasm::WASM_TYPE_I64:
return "i64";
- case MVT::f32:
+ case wasm::WASM_TYPE_F32:
return "f32";
- case MVT::f64:
+ case wasm::WASM_TYPE_F64:
return "f64";
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v4f32:
+ case wasm::WASM_TYPE_V128:
return "v128";
- case MVT::ExceptRef:
+ case wasm::WASM_TYPE_FUNCREF:
+ return "funcref";
+ case wasm::WASM_TYPE_FUNC:
+ return "func";
+ case wasm::WASM_TYPE_EXCEPT_REF:
return "except_ref";
+ case wasm::WASM_TYPE_NORESULT:
+ return "void";
default:
- llvm_unreachable("unsupported type");
+ return "invalid_type";
}
}
-const char *llvm::WebAssembly::TypeToString(wasm::ValType Type) {
- switch (Type) {
- case wasm::ValType::I32:
- return "i32";
- case wasm::ValType::I64:
- return "i64";
- case wasm::ValType::F32:
- return "f32";
- case wasm::ValType::F64:
- return "f64";
- case wasm::ValType::EXCEPT_REF:
- return "except_ref";
- }
- llvm_unreachable("unsupported type");
+const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
+ return anyTypeToString(static_cast<unsigned>(Ty));
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index f5b890a7615e..5ad45c7d5c7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -25,8 +25,13 @@ namespace llvm {
class MCSubtargetInfo;
class WebAssemblyInstPrinter final : public MCInstPrinter {
- uint64_t ControlFlowCounter;
- SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+ uint64_t ControlFlowCounter = 0;
+ uint64_t EHPadStackCounter = 0;
+ SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
+ SmallVector<uint64_t, 4> EHPadStack;
+
+ enum EHInstKind { TRY, CATCH, END_TRY };
+ EHInstKind LastSeenEHInst = END_TRY;
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -38,6 +43,7 @@ public:
// Used by tblegen code.
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O);
void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
@@ -50,8 +56,8 @@ public:
namespace WebAssembly {
-const char *TypeToString(MVT Ty);
-const char *TypeToString(wasm::ValType Type);
+const char *typeToString(wasm::ValType Ty);
+const char *anyTypeToString(unsigned Ty);
} // end namespace WebAssembly
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 244c2189b455..0726dd481174 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -73,13 +73,13 @@ public:
const MCFixupKindInfo &
WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // WebAssemblyFixupKinds.h.
- //
- // Name Offset (bits) Size (bits) Flags
- { "fixup_code_sleb128_i32", 0, 5*8, 0 },
- { "fixup_code_sleb128_i64", 0, 10*8, 0 },
- { "fixup_code_uleb128_i32", 0, 5*8, 0 },
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // WebAssemblyFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_code_sleb128_i32", 0, 5 * 8, 0},
+ {"fixup_code_sleb128_i64", 0, 10 * 8, 0},
+ {"fixup_code_uleb128_i32", 0, 5 * 8, 0},
};
if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index b0af63c924bd..c2fac5f93a2f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -15,11 +15,9 @@
namespace llvm {
namespace WebAssembly {
enum Fixups {
- fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
- fixup_code_sleb128_i64, // 64-bit signed
- fixup_code_uleb128_i32, // 32-bit unsigned
-
- fixup_code_global_index, // 32-bit unsigned
+ fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+ fixup_code_sleb128_i64, // 64-bit signed
+ fixup_code_uleb128_i32, // 32-bit unsigned
// Marker
LastTargetFixupKind,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 94ca94e1e18c..065a4dc94ca6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -67,13 +67,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
OS << uint8_t(Binary);
} else {
assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
- OS << uint8_t(Binary >> 8)
- << uint8_t(Binary);
+ OS << uint8_t(Binary >> 8);
+ encodeULEB128(uint8_t(Binary), OS);
}
// For br_table instructions, encode the size of the table. In the MCInst,
- // there's an index operand, one operand for each table entry, and the
- // default operand.
+ // there's an index operand (if not a stack instruction), one operand for
+ // each table entry, and the default operand.
+ if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S ||
+ MI.getOpcode() == WebAssembly::BR_TABLE_I64_S)
+ encodeULEB128(MI.getNumOperands() - 1, OS);
if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
MI.getOpcode() == WebAssembly::BR_TABLE_I64)
encodeULEB128(MI.getNumOperands() - 2, OS);
@@ -83,36 +86,47 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
/* nothing to encode */
+
} else if (MO.isImm()) {
if (i < Desc.getNumOperands()) {
- assert(Desc.TSFlags == 0 &&
- "WebAssembly non-variable_ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[i];
LLVM_DEBUG(dbgs() << "Encoding immediate: type="
<< int(Info.OperandType) << "\n");
- if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ switch (Info.OperandType) {
+ case WebAssembly::OPERAND_I32IMM:
encodeSLEB128(int32_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_OFFSET32) {
+ break;
+ case WebAssembly::OPERAND_OFFSET32:
encodeULEB128(uint32_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ break;
+ case WebAssembly::OPERAND_I64IMM:
encodeSLEB128(int64_t(MO.getImm()), OS);
- } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
- llvm_unreachable("wasm globals should only be accessed symbolicly");
- } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+ break;
+ case WebAssembly::OPERAND_SIGNATURE:
OS << uint8_t(MO.getImm());
- } else {
+ break;
+ case WebAssembly::OPERAND_VEC_I8IMM:
+ support::endian::write<uint8_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I16IMM:
+ support::endian::write<uint16_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I32IMM:
+ support::endian::write<uint32_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_VEC_I64IMM:
+ support::endian::write<uint64_t>(OS, MO.getImm(), support::little);
+ break;
+ case WebAssembly::OPERAND_GLOBAL:
+ llvm_unreachable("wasm globals should only be accessed symbolicly");
+ default:
encodeULEB128(uint64_t(MO.getImm()), OS);
}
} else {
- assert(Desc.TSFlags == (WebAssemblyII::VariableOpIsImmediate |
- WebAssemblyII::VariableOpImmediateIsLabel));
encodeULEB128(uint64_t(MO.getImm()), OS);
}
+
} else if (MO.isFPImm()) {
- assert(i < Desc.getNumOperands() &&
- "Unexpected floating-point immediate as a non-fixed operand");
- assert(Desc.TSFlags == 0 &&
- "WebAssembly variable_ops floating point ops don't use TSFlags");
const MCOperandInfo &Info = Desc.OpInfo[i];
if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
// TODO: MC converts all floating point immediate operands to double.
@@ -124,27 +138,31 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
double d = MO.getFPImm();
support::endian::write<double>(OS, d, support::little);
}
+
} else if (MO.isExpr()) {
const MCOperandInfo &Info = Desc.OpInfo[i];
llvm::MCFixupKind FixupKind;
size_t PaddedSize = 5;
- if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ switch (Info.OperandType) {
+ case WebAssembly::OPERAND_I32IMM:
FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
- } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ break;
+ case WebAssembly::OPERAND_I64IMM:
FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
PaddedSize = 10;
- } else if (Info.OperandType == WebAssembly::OPERAND_FUNCTION32 ||
- Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
- Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ break;
+ case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_TYPEINDEX:
+ case WebAssembly::OPERAND_GLOBAL:
+ case WebAssembly::OPERAND_EVENT:
FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
- } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
- FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index);
- } else {
+ break;
+ default:
llvm_unreachable("unexpected symbolic operand kind");
}
- Fixups.push_back(MCFixup::create(
- OS.tell() - Start, MO.getExpr(),
- FixupKind, MI.getLoc()));
+ Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(),
+ FixupKind, MI.getLoc()));
++MCNumFixups;
encodeULEB128(0, OS, PaddedSize);
} else {
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index baf8a0c96c0a..390f367c2978 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -90,6 +90,10 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
return new WebAssemblyTargetAsmStreamer(S, OS);
}
+static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) {
+ return new WebAssemblyTargetNullStreamer(S);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeWebAssemblyTargetMC() {
for (Target *T :
@@ -120,16 +124,31 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
createObjectTargetStreamer);
// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+ // Register the null target streamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer);
}
}
wasm::ValType WebAssembly::toValType(const MVT &Ty) {
switch (Ty.SimpleTy) {
- case MVT::i32: return wasm::ValType::I32;
- case MVT::i64: return wasm::ValType::I64;
- case MVT::f32: return wasm::ValType::F32;
- case MVT::f64: return wasm::ValType::F64;
- case MVT::ExceptRef: return wasm::ValType::EXCEPT_REF;
- default: llvm_unreachable("unexpected type");
+ case MVT::i32:
+ return wasm::ValType::I32;
+ case MVT::i64:
+ return wasm::ValType::I64;
+ case MVT::f32:
+ return wasm::ValType::F32;
+ case MVT::f64:
+ return wasm::ValType::F64;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return wasm::ValType::V128;
+ case MVT::ExceptRef:
+ return wasm::ValType::EXCEPT_REF;
+ default:
+ llvm_unreachable("unexpected type");
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index c1c8d243e920..a01517fb90c3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -59,6 +59,14 @@ enum OperandType {
OPERAND_F32IMM,
/// 64-bit floating-point immediates.
OPERAND_F64IMM,
+ /// 8-bit vector lane immediate
+ OPERAND_VEC_I8IMM,
+ /// 16-bit vector lane immediate
+ OPERAND_VEC_I16IMM,
+ /// 32-bit vector lane immediate
+ OPERAND_VEC_I32IMM,
+ /// 64-bit vector lane immediate
+ OPERAND_VEC_I64IMM,
/// 32-bit unsigned function indices.
OPERAND_FUNCTION32,
/// 32-bit unsigned memory offsets.
@@ -69,17 +77,24 @@ enum OperandType {
OPERAND_SIGNATURE,
/// type signature immediate for call_indirect.
OPERAND_TYPEINDEX,
+ /// Event index.
+ OPERAND_EVENT,
+ /// A list of branch targets for br_list.
+ OPERAND_BRLIST,
};
} // end namespace WebAssembly
namespace WebAssemblyII {
-enum {
- // For variadic instructions, this flag indicates whether an operand
- // in the variable_ops range is an immediate value.
- VariableOpIsImmediate = (1 << 0),
- // For immediate values in the variable_ops range, this flag indicates
- // whether the value represents a control-flow label.
- VariableOpImmediateIsLabel = (1 << 1)
+
+/// Target Operand Flag enum.
+enum TOF {
+ MO_NO_FLAG = 0,
+
+ // Flags to indicate the type of the symbol being referenced
+ MO_SYMBOL_FUNCTION = 0x1,
+ MO_SYMBOL_GLOBAL = 0x2,
+ MO_SYMBOL_EVENT = 0x4,
+ MO_SYMBOL_MASK = 0x7,
};
} // end namespace WebAssemblyII
@@ -149,6 +164,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S:
return 0;
case WebAssembly::LOAD16_S_I32:
case WebAssembly::LOAD16_S_I32_S:
@@ -194,6 +213,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S:
return 1;
case WebAssembly::LOAD_I32:
case WebAssembly::LOAD_I32_S:
@@ -241,6 +264,14 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW_XCHG_I32_S:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S:
+ case WebAssembly::ATOMIC_NOTIFY:
+ case WebAssembly::ATOMIC_NOTIFY_S:
+ case WebAssembly::ATOMIC_WAIT_I32:
+ case WebAssembly::ATOMIC_WAIT_I32_S:
return 2;
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_I64_S:
@@ -266,7 +297,36 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::ATOMIC_RMW_XOR_I64_S:
case WebAssembly::ATOMIC_RMW_XCHG_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I64_S:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S:
+ case WebAssembly::ATOMIC_WAIT_I64:
+ case WebAssembly::ATOMIC_WAIT_I64_S:
return 3;
+ case WebAssembly::LOAD_v16i8:
+ case WebAssembly::LOAD_v16i8_S:
+ case WebAssembly::LOAD_v8i16:
+ case WebAssembly::LOAD_v8i16_S:
+ case WebAssembly::LOAD_v4i32:
+ case WebAssembly::LOAD_v4i32_S:
+ case WebAssembly::LOAD_v2i64:
+ case WebAssembly::LOAD_v2i64_S:
+ case WebAssembly::LOAD_v4f32:
+ case WebAssembly::LOAD_v4f32_S:
+ case WebAssembly::LOAD_v2f64:
+ case WebAssembly::LOAD_v2f64_S:
+ case WebAssembly::STORE_v16i8:
+ case WebAssembly::STORE_v16i8_S:
+ case WebAssembly::STORE_v8i16:
+ case WebAssembly::STORE_v8i16_S:
+ case WebAssembly::STORE_v4i32:
+ case WebAssembly::STORE_v4i32_S:
+ case WebAssembly::STORE_v2i64:
+ case WebAssembly::STORE_v2i64_S:
+ case WebAssembly::STORE_v4f32:
+ case WebAssembly::STORE_v4f32_S:
+ case WebAssembly::STORE_v2f64:
+ case WebAssembly::STORE_v2f64_S:
+ return 4;
default:
llvm_unreachable("Only loads and stores have p2align values");
}
@@ -282,19 +342,14 @@ static const unsigned StoreP2AlignOperandNo = 0;
/// This is used to indicate block signatures.
enum class ExprType : unsigned {
- Void = 0x40,
- I32 = 0x7F,
- I64 = 0x7E,
- F32 = 0x7D,
- F64 = 0x7C,
- I8x16 = 0x7B,
- I16x8 = 0x7A,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75,
- ExceptRef = 0x68
+ Void = 0x40,
+ I32 = 0x7F,
+ I64 = 0x7E,
+ F32 = 0x7D,
+ F64 = 0x7C,
+ V128 = 0x7B,
+ ExceptRef = 0x68,
+ Invalid = 0x00
};
/// Instruction opcodes emitted via means other than CodeGen.
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 5272e188e1d0..50143fb0ece3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -39,70 +39,80 @@ WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S)
: WebAssemblyTargetStreamer(S) {}
-static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+static void printTypes(formatted_raw_ostream &OS,
+ ArrayRef<wasm::ValType> Types) {
bool First = true;
- for (MVT Type : Types) {
+ for (auto Type : Types) {
if (First)
First = false;
else
OS << ", ";
- OS << WebAssembly::TypeToString(Type);
+ OS << WebAssembly::typeToString(Type);
}
OS << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitParam(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
if (!Types.empty()) {
- OS << "\t.param \t";
-
- // FIXME: Currently this applies to the "current" function; it may
- // be cleaner to specify an explicit symbol as part of the directive.
-
- PrintTypes(OS, Types);
+ OS << "\t.local \t";
+ printTypes(OS, Types);
}
}
-void WebAssemblyTargetAsmStreamer::emitResult(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- if (!Types.empty()) {
- OS << "\t.result \t";
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
- // FIXME: Currently this applies to the "current" function; it may
- // be cleaner to specify an explicit symbol as part of the directive.
+void WebAssemblyTargetAsmStreamer::emitSignature(
+ const wasm::WasmSignature *Sig) {
+ OS << "(";
+ emitParamList(Sig);
+ OS << ") -> (";
+ emitReturnList(Sig);
+ OS << ")";
+}
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitParamList(
+ const wasm::WasmSignature *Sig) {
+ auto &Params = Sig->Params;
+ for (auto &Ty : Params) {
+ if (&Ty != &Params[0])
+ OS << ", ";
+ OS << WebAssembly::typeToString(Ty);
}
}
-void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
- if (!Types.empty()) {
- OS << "\t.local \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitReturnList(
+ const wasm::WasmSignature *Sig) {
+ auto &Returns = Sig->Returns;
+ for (auto &Ty : Returns) {
+ if (&Ty != &Returns[0])
+ OS << ", ";
+ OS << WebAssembly::typeToString(Ty);
}
}
-void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) {
+ assert(Sym->isFunction());
+ OS << "\t.functype\t" << Sym->getName() << " ";
+ emitSignature(Sym->getSignature());
+ OS << "\n";
+}
-void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
- MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
- OS << "\t.functype\t" << Symbol->getName();
- if (Results.empty())
- OS << ", void";
- else {
- assert(Results.size() == 1);
- OS << ", " << WebAssembly::TypeToString(Results.front());
- }
- for (auto Ty : Params)
- OS << ", " << WebAssembly::TypeToString(Ty);
- OS << '\n';
+void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
+ assert(Sym->isGlobal());
+ OS << "\t.globaltype\t" << Sym->getName() << ", "
+ << WebAssembly::typeToString(
+ static_cast<wasm::ValType>(Sym->getGlobalType().Type))
+ << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitGlobalImport(StringRef name) {
- OS << "\t.import_global\t" << name << '\n';
+void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
+ assert(Sym->isEvent());
+ OS << "\t.eventtype\t" << Sym->getName() << " ";
+ emitParamList(Sym->getSignature());
+ OS << "\n";
}
-void WebAssemblyTargetAsmStreamer::emitImportModule(MCSymbolWasm *Sym,
+void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
StringRef ModuleName) {
OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
}
@@ -111,27 +121,9 @@ void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
OS << "\t.indidx \t" << *Value << '\n';
}
-void WebAssemblyTargetWasmStreamer::emitParam(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- SmallVector<wasm::ValType, 4> Params;
- for (MVT Ty : Types)
- Params.push_back(WebAssembly::toValType(Ty));
-
- cast<MCSymbolWasm>(Symbol)->setParams(std::move(Params));
-}
-
-void WebAssemblyTargetWasmStreamer::emitResult(MCSymbol *Symbol,
- ArrayRef<MVT> Types) {
- SmallVector<wasm::ValType, 4> Returns;
- for (MVT Ty : Types)
- Returns.push_back(WebAssembly::toValType(Ty));
-
- cast<MCSymbolWasm>(Symbol)->setReturns(std::move(Returns));
-}
-
-void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
- SmallVector<std::pair<MVT, uint32_t>, 4> Grouped;
- for (MVT Type : Types) {
+void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
+ SmallVector<std::pair<wasm::ValType, uint32_t>, 4> Grouped;
+ for (auto Type : Types) {
if (Grouped.empty() || Grouped.back().first != Type)
Grouped.push_back(std::make_pair(Type, 1));
else
@@ -141,7 +133,7 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
Streamer.EmitULEB128IntValue(Grouped.size());
for (auto Pair : Grouped) {
Streamer.EmitULEB128IntValue(Pair.second);
- emitValueType(WebAssembly::toValType(Pair.first));
+ emitValueType(Pair.first);
}
}
@@ -152,34 +144,3 @@ void WebAssemblyTargetWasmStreamer::emitEndFunc() {
void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
llvm_unreachable(".indidx encoding not yet implemented");
}
-
-void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
- MCSymbol *Symbol, SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) {
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Symbol);
- if (WasmSym->isFunction()) {
- // Symbol already has its arguments and result set.
- return;
- }
-
- SmallVector<wasm::ValType, 4> ValParams;
- for (MVT Ty : Params)
- ValParams.push_back(WebAssembly::toValType(Ty));
-
- SmallVector<wasm::ValType, 1> ValResults;
- for (MVT Ty : Results)
- ValResults.push_back(WebAssembly::toValType(Ty));
-
- WasmSym->setParams(std::move(ValParams));
- WasmSym->setReturns(std::move(ValResults));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
-}
-
-void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
- llvm_unreachable(".global_import is not needed for direct wasm output");
-}
-
-void WebAssemblyTargetWasmStreamer::emitImportModule(MCSymbolWasm *Sym,
- StringRef ModuleName) {
- Sym->setModuleName(ModuleName);
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index cafcb04ccd11..3073938118b4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -31,24 +31,21 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer {
public:
explicit WebAssemblyTargetStreamer(MCStreamer &S);
- /// .param
- virtual void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
- /// .result
- virtual void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .local
- virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ virtual void emitLocal(ArrayRef<wasm::ValType> Types) = 0;
/// .endfunc
virtual void emitEndFunc() = 0;
/// .functype
- virtual void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) = 0;
+ virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0;
/// .indidx
virtual void emitIndIdx(const MCExpr *Value) = 0;
- /// .import_global
- virtual void emitGlobalImport(StringRef name) = 0;
+ /// .globaltype
+ virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0;
+ /// .eventtype
+ virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
/// .import_module
- virtual void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) = 0;
+ virtual void emitImportModule(const MCSymbolWasm *Sym,
+ StringRef ModuleName) = 0;
protected:
void emitValueType(wasm::ValType Type);
@@ -57,20 +54,20 @@ protected:
/// This part is for ascii assembly output
class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
formatted_raw_ostream &OS;
+ void emitSignature(const wasm::WasmSignature *Sig);
+ void emitParamList(const wasm::WasmSignature *Sig);
+ void emitReturnList(const wasm::WasmSignature *Sig);
public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitLocal(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<wasm::ValType> Types) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) override;
+ void emitFunctionType(const MCSymbolWasm *Sym) override;
void emitIndIdx(const MCExpr *Value) override;
- void emitGlobalImport(StringRef name) override;
- void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+ void emitGlobalType(const MCSymbolWasm *Sym) override;
+ void emitEventType(const MCSymbolWasm *Sym) override;
+ void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
};
/// This part is for Wasm object output
@@ -78,16 +75,29 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
public:
explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
- void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
- void emitLocal(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<wasm::ValType> Types) override;
void emitEndFunc() override;
- void emitIndirectFunctionType(MCSymbol *Symbol,
- SmallVectorImpl<MVT> &Params,
- SmallVectorImpl<MVT> &Results) override;
+ void emitFunctionType(const MCSymbolWasm *Sym) override {}
void emitIndIdx(const MCExpr *Value) override;
- void emitGlobalImport(StringRef name) override;
- void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+ void emitGlobalType(const MCSymbolWasm *Sym) override {}
+ void emitEventType(const MCSymbolWasm *Sym) override {}
+ void emitImportModule(const MCSymbolWasm *Sym,
+ StringRef ModuleName) override {}
+};
+
+/// This part is for null output
+class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetNullStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
+ void emitLocal(ArrayRef<wasm::ValType>) override {}
+ void emitEndFunc() override {}
+ void emitFunctionType(const MCSymbolWasm *) override {}
+ void emitIndIdx(const MCExpr *) override {}
+ void emitGlobalType(const MCSymbolWasm *) override {}
+ void emitEventType(const MCSymbolWasm *) override {}
+ void emitImportModule(const MCSymbolWasm *, StringRef) override {}
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 4fb12d40b01b..763e30be8e02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -81,16 +81,23 @@ static const MCSection *GetFixupSection(const MCExpr *Expr) {
return nullptr;
}
-unsigned
-WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
- const MCFixup &Fixup) const {
+static bool IsGlobalType(const MCValue &Target) {
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_GLOBAL;
+}
+
+static bool IsEventType(const MCValue &Target) {
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_EVENT;
+}
+
+unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup) const {
// WebAssembly functions are not allocated in the data address space. To
// resolve a pointer to a function, we must use a special relocation type.
bool IsFunction = IsFunctionExpr(Fixup.getValue());
switch (unsigned(Fixup.getKind())) {
- case WebAssembly::fixup_code_global_index:
- return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
case WebAssembly::fixup_code_sleb128_i32:
if (IsFunction)
return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
@@ -98,10 +105,14 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
case WebAssembly::fixup_code_sleb128_i64:
llvm_unreachable("fixup_sleb128_i64 not implemented yet");
case WebAssembly::fixup_code_uleb128_i32:
+ if (IsGlobalType(Target))
+ return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
if (IsFunctionType(Target))
return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
if (IsFunction)
return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
+ if (IsEventType(Target))
+ return wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB;
return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
case FK_Data_4:
if (IsFunction)
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index ef0099f07efb..a154b4bf7ea8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -94,10 +94,10 @@ WebAssemblyTargetLowering.
//===---------------------------------------------------------------------===//
Instead of the OptimizeReturned pass, which should consider preserving the
-"returned" attribute through to MachineInstrs and extending the StoreResults
-pass to do this optimization on calls too. That would also let the
-WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
-stores.
+"returned" attribute through to MachineInstrs and extending the
+MemIntrinsicResults pass to do this optimization on calls too. That would also
+let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does
+for stores.
//===---------------------------------------------------------------------===//
@@ -120,8 +120,8 @@ code like this:
It could be done with a smaller encoding like this:
i32.const $push5=, 0
- tee_local $push6=, $4=, $pop5
- copy_local $3=, $pop6
+ local.tee $push6=, $4=, $pop5
+ local.copy $3=, $pop6
//===---------------------------------------------------------------------===//
@@ -180,11 +180,11 @@ floating-point constants.
//===---------------------------------------------------------------------===//
The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll
-ends up with a tee_local in its prolog which has an unused result, requiring
+ends up with a local.tee in its prolog which has an unused result, requiring
an extra drop:
- get_global $push8=, 0
- tee_local $push9=, 1, $pop8
+ global.get $push8=, 0
+ local.tee $push9=, 1, $pop8
drop $pop9
[...]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 05b7b21fb597..45145c0a6527 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -39,10 +39,11 @@ FunctionPass *createWebAssemblyArgumentMove();
FunctionPass *createWebAssemblySetP2AlignOperands();
// Late passes.
+FunctionPass *createWebAssemblyEHRestoreStackPointer();
FunctionPass *createWebAssemblyReplacePhysRegs();
FunctionPass *createWebAssemblyPrepareForLiveIntervals();
FunctionPass *createWebAssemblyOptimizeLiveIntervals();
-FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyExplicitLocals();
@@ -63,10 +64,11 @@ void initializeFixFunctionBitcastsPass(PassRegistry &);
void initializeOptimizeReturnedPass(PassRegistry &);
void initializeWebAssemblyArgumentMovePass(PassRegistry &);
void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
+void initializeWebAssemblyEHRestoreStackPointerPass(PassRegistry &);
void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
-void initializeWebAssemblyStoreResultsPass(PassRegistry &);
+void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
void initializeWebAssemblyRegStackifyPass(PassRegistry &);
void initializeWebAssemblyRegColoringPass(PassRegistry &);
void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index 2f301da8e422..6b218f8aa880 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -23,8 +23,15 @@ include "llvm/Target/Target.td"
// WebAssembly Subtarget features.
//===----------------------------------------------------------------------===//
-def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
+def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128",
"Enable 128-bit SIMD">;
+
+def FeatureUnimplementedSIMD128 :
+ SubtargetFeature<"unimplemented-simd128",
+ "SIMDLevel", "UnimplementedSIMD128",
+ "Enable 128-bit SIMD not yet implemented in engines",
+ [FeatureSIMD128]>;
+
def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
"Enable Atomics">;
def FeatureNontrappingFPToInt :
@@ -71,7 +78,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel,
- [FeatureSIMD128, FeatureAtomics]>;
+ [FeatureSIMD128, FeatureAtomics,
+ FeatureNontrappingFPToInt, FeatureSignExt]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index 4af9cd150bf7..e49e2b67f435 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -24,10 +24,10 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-add-missing-prototypes"
@@ -60,16 +60,17 @@ ModulePass *llvm::createWebAssemblyAddMissingPrototypes() {
}
bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
- LLVM_DEBUG(dbgs() << "runnning AddMissingPrototypes\n");
+ LLVM_DEBUG(dbgs() << "********** Add Missing Prototypes **********\n");
- std::vector<std::pair<Function*, Function*>> Replacements;
+ std::vector<std::pair<Function *, Function *>> Replacements;
// Find all the prototype-less function declarations
for (Function &F : M) {
if (!F.isDeclaration() || !F.hasFnAttribute("no-prototype"))
continue;
- LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName()
+ << "\n");
// When clang emits prototype-less C functions it uses (...), i.e. varargs
// function that take no arguments (have no sentinel). When we see a
@@ -83,23 +84,29 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
"Functions with 'no-prototype' attribute should not have params: " +
F.getName());
-
// Create a function prototype based on the first call site (first bitcast)
// that we find.
FunctionType *NewType = nullptr;
- Function* NewF = nullptr;
+ Function *NewF = nullptr;
for (Use &U : F.uses()) {
LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
- if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- FunctionType *DestType =
- cast<FunctionType>(BC->getDestTy()->getPointerElementType());
-
- // Create a new function with the correct type
- NewType = DestType;
- NewF = Function::Create(NewType, F.getLinkage(), F.getName());
- NewF->setAttributes(F.getAttributes());
- NewF->removeFnAttr("no-prototype");
- break;
+ if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
+ if (auto *DestType = dyn_cast<FunctionType>(
+ BC->getDestTy()->getPointerElementType())) {
+ if (!NewType) {
+ // Create a new function with the correct type
+ NewType = DestType;
+ NewF = Function::Create(NewType, F.getLinkage(), F.getName());
+ NewF->setAttributes(F.getAttributes());
+ NewF->removeFnAttr("no-prototype");
+ } else {
+ if (NewType != DestType) {
+ report_fatal_error("Prototypeless function used with "
+ "conflicting signatures: " +
+ F.getName());
+ }
+ }
+ }
}
}
@@ -110,32 +117,42 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
continue;
}
- for (Use &U : F.uses()) {
- if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- FunctionType *DestType =
- cast<FunctionType>(BC->getDestTy()->getPointerElementType());
- if (NewType != DestType) {
- report_fatal_error(
- "Prototypeless function used with conflicting signatures: " +
- F.getName());
- }
- BC->replaceAllUsesWith(NewF);
- Replacements.emplace_back(&F, NewF);
- } else {
- dbgs() << *U.getUser()->getType() << "\n";
+ SmallVector<Instruction *, 4> DeadInsts;
+
+ for (Use &US : F.uses()) {
+ User *U = US.getUser();
+ if (auto *BC = dyn_cast<BitCastOperator>(U)) {
+ if (auto *Inst = dyn_cast<BitCastInst>(U)) {
+ // Replace with a new bitcast
+ IRBuilder<> Builder(Inst);
+ Value *NewCast = Builder.CreatePointerCast(NewF, BC->getDestTy());
+ Inst->replaceAllUsesWith(NewCast);
+ DeadInsts.push_back(Inst);
+ } else if (auto *Const = dyn_cast<ConstantExpr>(U)) {
+ Constant *NewConst =
+ ConstantExpr::getPointerCast(NewF, BC->getDestTy());
+ Const->replaceAllUsesWith(NewConst);
+ } else {
+ dbgs() << *U->getType() << "\n";
#ifndef NDEBUG
- U.getUser()->dump();
+ U->dump();
#endif
- report_fatal_error(
- "unexpected use of prototypeless function: " + F.getName() + "\n");
+ report_fatal_error("unexpected use of prototypeless function: " +
+ F.getName() + "\n");
+ }
}
}
+
+ for (auto I : DeadInsts)
+ I->eraseFromParent();
+ Replacements.emplace_back(&F, NewF);
}
+
// Finally replace the old function declarations with the new ones
for (auto &Pair : Replacements) {
- Function* Old = Pair.first;
- Function* New = Pair.second;
+ Function *Old = Pair.first;
+ Function *New = Pair.second;
Old->eraseFromParent();
M.getFunctionList().push_back(New);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 1f280e1d13fc..c4f03dfa7f9e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -50,7 +50,7 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
- MVT::v4i32, MVT::v4f32})
+ MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64})
if (TRI->isTypeLegalForClass(*TRC, T))
return T;
LLVM_DEBUG(errs() << "Unknown type for register number: " << RegNo);
@@ -78,24 +78,45 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
//===----------------------------------------------------------------------===//
void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ for (auto &It : OutContext.getSymbols()) {
+ // Emit a .globaltype and .eventtype declaration.
+ auto Sym = cast<MCSymbolWasm>(It.getValue());
+ if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL)
+ getTargetStreamer()->emitGlobalType(Sym);
+ else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_EVENT)
+ getTargetStreamer()->emitEventType(Sym);
+ }
+
for (const auto &F : M) {
// Emit function type info for all undefined functions
if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
- ComputeSignatureVTs(F, TM, Params, Results);
- MCSymbol *Sym = getSymbol(&F);
- getTargetStreamer()->emitIndirectFunctionType(Sym, Params, Results);
+ ComputeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+ auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ if (!Sym->getSignature()) {
+ auto Signature = SignatureFromMVTs(Results, Params);
+ Sym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ }
+ // FIXME: this was originally intended for post-linking and was only used
+ // for imports that were only called indirectly (i.e. s2wasm could not
+ // infer the type from a call). With object files it applies to all
+ // imports. so fix the names and the tests, or rethink how import
+ // delcarations work in asm files.
+ getTargetStreamer()->emitFunctionType(Sym);
if (TM.getTargetTriple().isOSBinFormatWasm() &&
F.hasFnAttribute("wasm-import-module")) {
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
- StringRef Name = F.getFnAttribute("wasm-import-module")
- .getValueAsString();
- getTargetStreamer()->emitImportModule(WasmSym, Name);
+ StringRef Name =
+ F.getFnAttribute("wasm-import-module").getValueAsString();
+ Sym->setModuleName(Name);
+ getTargetStreamer()->emitImportModule(Sym, Name);
}
}
}
+
for (const auto &G : M.globals()) {
if (!G.hasInitializer() && G.hasExternalLinkage()) {
if (G.getValueType()->isSized()) {
@@ -137,10 +158,18 @@ void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
-
- SmallVector<MVT, 4> ResultVTs;
const Function &F = MF->getFunction();
+ SmallVector<MVT, 1> ResultVTs;
+ SmallVector<MVT, 4> ParamVTs;
+ ComputeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+ auto Signature = SignatureFromMVTs(ResultVTs, ParamVTs);
+ auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+
+ // FIXME: clean up how params and results are emitted (use signatures)
+ getTargetStreamer()->emitFunctionType(WasmSym);
// Emit the function index.
if (MDNode *Idx = F.getMetadata("wasm.index")) {
@@ -150,16 +179,9 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
}
- ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
-
- // If the return type needs to be legalized it will get converted into
- // passing a pointer.
- if (ResultVTs.size() == 1)
- getTargetStreamer()->emitResult(CurrentFnSym, ResultVTs);
- else
- getTargetStreamer()->emitResult(CurrentFnSym, ArrayRef<MVT>());
-
- getTargetStreamer()->emitLocal(MFI->getLocals());
+ SmallVector<wasm::ValType, 16> Locals;
+ ValTypesFromMVTs(MFI->getLocals(), Locals);
+ getTargetStreamer()->emitLocal(Locals);
AsmPrinter::EmitFunctionBodyStart();
}
@@ -168,42 +190,63 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
switch (MI->getOpcode()) {
- case WebAssembly::ARGUMENT_I32:
- case WebAssembly::ARGUMENT_I64:
- case WebAssembly::ARGUMENT_F32:
- case WebAssembly::ARGUMENT_F64:
+ case WebAssembly::ARGUMENT_i32:
+ case WebAssembly::ARGUMENT_i32_S:
+ case WebAssembly::ARGUMENT_i64:
+ case WebAssembly::ARGUMENT_i64_S:
+ case WebAssembly::ARGUMENT_f32:
+ case WebAssembly::ARGUMENT_f32_S:
+ case WebAssembly::ARGUMENT_f64:
+ case WebAssembly::ARGUMENT_f64_S:
case WebAssembly::ARGUMENT_v16i8:
+ case WebAssembly::ARGUMENT_v16i8_S:
case WebAssembly::ARGUMENT_v8i16:
+ case WebAssembly::ARGUMENT_v8i16_S:
case WebAssembly::ARGUMENT_v4i32:
+ case WebAssembly::ARGUMENT_v4i32_S:
+ case WebAssembly::ARGUMENT_v2i64:
+ case WebAssembly::ARGUMENT_v2i64_S:
case WebAssembly::ARGUMENT_v4f32:
+ case WebAssembly::ARGUMENT_v4f32_S:
+ case WebAssembly::ARGUMENT_v2f64:
+ case WebAssembly::ARGUMENT_v2f64_S:
// These represent values which are live into the function entry, so there's
// no instruction to emit.
break;
case WebAssembly::FALLTHROUGH_RETURN_I32:
+ case WebAssembly::FALLTHROUGH_RETURN_I32_S:
case WebAssembly::FALLTHROUGH_RETURN_I64:
+ case WebAssembly::FALLTHROUGH_RETURN_I64_S:
case WebAssembly::FALLTHROUGH_RETURN_F32:
+ case WebAssembly::FALLTHROUGH_RETURN_F32_S:
case WebAssembly::FALLTHROUGH_RETURN_F64:
+ case WebAssembly::FALLTHROUGH_RETURN_F64_S:
case WebAssembly::FALLTHROUGH_RETURN_v16i8:
+ case WebAssembly::FALLTHROUGH_RETURN_v16i8_S:
case WebAssembly::FALLTHROUGH_RETURN_v8i16:
+ case WebAssembly::FALLTHROUGH_RETURN_v8i16_S:
case WebAssembly::FALLTHROUGH_RETURN_v4i32:
- case WebAssembly::FALLTHROUGH_RETURN_v4f32: {
+ case WebAssembly::FALLTHROUGH_RETURN_v4i32_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v2i64:
+ case WebAssembly::FALLTHROUGH_RETURN_v2i64_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v4f32:
+ case WebAssembly::FALLTHROUGH_RETURN_v4f32_S:
+ case WebAssembly::FALLTHROUGH_RETURN_v2f64:
+ case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: {
// These instructions represent the implicit return at the end of a
- // function body. The operand is always a pop.
- assert(MFI->isVRegStackified(MI->getOperand(0).getReg()));
-
+ // function body. Always pops one value off the stack.
if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return: $pop" +
- Twine(MFI->getWARegStackId(
- MFI->getWAReg(MI->getOperand(0).getReg()))));
+ OutStreamer->AddComment("fallthrough-return-value");
OutStreamer->AddBlankLine();
}
break;
}
case WebAssembly::FALLTHROUGH_RETURN_VOID:
+ case WebAssembly::FALLTHROUGH_RETURN_VOID_S:
// This instruction represents the implicit return at the end of a
// function body with no return value.
if (isVerbose()) {
- OutStreamer->AddComment("fallthrough-return");
+ OutStreamer->AddComment("fallthrough-return-void");
OutStreamer->AddBlankLine();
}
break;
@@ -244,6 +287,9 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
OS << MO.getImm();
return false;
case MachineOperand::MO_Register:
+ // FIXME: only opcode that still contains registers, as required by
+ // MachineInstr::getDebugVariable().
+ assert(MI->getOpcode() == WebAssembly::INLINEASM);
OS << regToString(MO);
return false;
case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 23817b4e5126..f6cb5610bad3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -25,18 +25,23 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
const WebAssemblySubtarget *Subtarget;
const MachineRegisterInfo *MRI;
WebAssemblyFunctionInfo *MFI;
+ // TODO: Do the uniquing of Signatures here instead of ObjectFileWriter?
+ std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
public:
explicit WebAssemblyAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)),
- Subtarget(nullptr), MRI(nullptr), MFI(nullptr) {}
+ : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), MRI(nullptr),
+ MFI(nullptr) {}
StringRef getPassName() const override {
return "WebAssembly Assembly Printer";
}
const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; }
+ void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+ Signatures.push_back(std::move(Sig));
+ }
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 267a51433cd1..fc827e9d5780 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -11,14 +11,15 @@
/// This file implements a CFG sorting pass.
///
/// This pass reorders the blocks in a function to put them into topological
-/// order, ignoring loop backedges, and without any loop being interrupted
-/// by a block not dominated by the loop header, with special care to keep the
-/// order as similar as possible to the original order.
+/// order, ignoring loop backedges, and without any loop or exception being
+/// interrupted by a block not dominated by the its header, with special care
+/// to keep the order as similar as possible to the original order.
///
////===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PriorityQueue.h"
@@ -35,6 +36,73 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-cfg-sort"
namespace {
+
+// Wrapper for loops and exceptions
+class Region {
+public:
+ virtual ~Region() = default;
+ virtual MachineBasicBlock *getHeader() const = 0;
+ virtual bool contains(const MachineBasicBlock *MBB) const = 0;
+ virtual unsigned getNumBlocks() const = 0;
+ using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+ virtual iterator_range<block_iterator> blocks() const = 0;
+ virtual bool isLoop() const = 0;
+};
+
+template <typename T> class ConcreteRegion : public Region {
+ const T *Region;
+
+public:
+ ConcreteRegion(const T *Region) : Region(Region) {}
+ MachineBasicBlock *getHeader() const override { return Region->getHeader(); }
+ bool contains(const MachineBasicBlock *MBB) const override {
+ return Region->contains(MBB);
+ }
+ unsigned getNumBlocks() const override { return Region->getNumBlocks(); }
+ iterator_range<block_iterator> blocks() const override {
+ return Region->blocks();
+ }
+ bool isLoop() const override { return false; }
+};
+
+template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; }
+
+// This class has information of nested Regions; this is analogous to what
+// LoopInfo is for loops.
+class RegionInfo {
+ const MachineLoopInfo &MLI;
+ const WebAssemblyExceptionInfo &WEI;
+ std::vector<const Region *> Regions;
+ DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap;
+ DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap;
+
+public:
+ RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI)
+ : MLI(MLI), WEI(WEI) {}
+
+ // Returns a smallest loop or exception that contains MBB
+ const Region *getRegionFor(const MachineBasicBlock *MBB) {
+ const auto *ML = MLI.getLoopFor(MBB);
+ const auto *WE = WEI.getExceptionFor(MBB);
+ if (!ML && !WE)
+ return nullptr;
+ if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) {
+ // If the smallest region containing MBB is a loop
+ if (LoopMap.count(ML))
+ return LoopMap[ML].get();
+ LoopMap[ML] = llvm::make_unique<ConcreteRegion<MachineLoop>>(ML);
+ return LoopMap[ML].get();
+ } else {
+ // If the smallest region containing MBB is an exception
+ if (ExceptionMap.count(WE))
+ return ExceptionMap[WE].get();
+ ExceptionMap[WE] =
+ llvm::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
+ return ExceptionMap[WE].get();
+ }
+ }
+};
+
class WebAssemblyCFGSort final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
@@ -44,6 +112,8 @@ class WebAssemblyCFGSort final : public MachineFunctionPass {
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<WebAssemblyExceptionInfo>();
+ AU.addPreserved<WebAssemblyExceptionInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -81,10 +151,48 @@ static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
}
namespace {
+// EH pads are selected first regardless of the block comparison order.
+// When only one of the BBs is an EH pad, we give a higher priority to it, to
+// prevent common mismatches between possibly throwing calls and ehpads they
+// unwind to, as in the example below:
+//
+// bb0:
+// call @foo // If this throws, unwind to bb2
+// bb1:
+// call @bar // If this throws, unwind to bb3
+// bb2 (ehpad):
+// handler_bb2
+// bb3 (ehpad):
+// handler_bb3
+// continuing code
+//
+// Because this pass tries to preserve the original BB order, this order will
+// not change. But this will result in this try-catch structure in CFGStackify,
+// resulting in a mismatch:
+// try
+// try
+// call @foo
+// call @bar // This should unwind to bb3, not bb2!
+// catch
+// handler_bb2
+// end
+// catch
+// handler_bb3
+// end
+// continuing code
+//
+// If we give a higher priority to an EH pad whenever it is ready in this
+// example, when both bb1 and bb2 are ready, we would pick up bb2 first.
+
/// Sort blocks by their number.
struct CompareBlockNumbers {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+
return A->getNumber() > B->getNumber();
}
};
@@ -92,29 +200,36 @@ struct CompareBlockNumbers {
struct CompareBlockNumbersBackwards {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
+ // We give a higher priority to an EH pad
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+
return A->getNumber() < B->getNumber();
}
};
-/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
-/// by the loop header among the loop's blocks.
+/// Bookkeeping for a region to help ensure that we don't mix blocks not
+/// dominated by the its header among its blocks.
struct Entry {
- const MachineLoop *Loop;
+ const Region *TheRegion;
unsigned NumBlocksLeft;
/// List of blocks not dominated by Loop's header that are deferred until
/// after all of Loop's blocks have been seen.
std::vector<MachineBasicBlock *> Deferred;
- explicit Entry(const MachineLoop *L)
- : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
+ explicit Entry(const class Region *R)
+ : TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {}
};
} // end anonymous namespace
-/// Sort the blocks, taking special care to make sure that loops are not
+/// Sort the blocks, taking special care to make sure that regions are not
/// interrupted by blocks not dominated by their header.
/// TODO: There are many opportunities for improving the heuristics here.
/// Explore them.
static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const WebAssemblyExceptionInfo &WEI,
const MachineDominatorTree &MDT) {
// Prepare for a topological sort: Record the number of predecessors each
// block has, ignoring loop backedges.
@@ -131,35 +246,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
}
// Topological sort the CFG, with additional constraints:
- // - Between a loop header and the last block in the loop, there can be
- // no blocks not dominated by the loop header.
+ // - Between a region header and the last block in the region, there can be
+ // no blocks not dominated by its header.
// - It's desirable to preserve the original block order when possible.
// We use two ready lists; Preferred and Ready. Preferred has recently
// processed successors, to help preserve block sequences from the original
- // order. Ready has the remaining ready blocks.
+ // order. Ready has the remaining ready blocks. EH blocks are picked first
+ // from both queues.
PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
CompareBlockNumbers>
Preferred;
PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
CompareBlockNumbersBackwards>
Ready;
- SmallVector<Entry, 4> Loops;
+
+ RegionInfo SUI(MLI, WEI);
+ SmallVector<Entry, 4> Entries;
for (MachineBasicBlock *MBB = &MF.front();;) {
- const MachineLoop *L = MLI.getLoopFor(MBB);
- if (L) {
- // If MBB is a loop header, add it to the active loop list. We can't put
- // any blocks that it doesn't dominate until we see the end of the loop.
- if (L->getHeader() == MBB)
- Loops.push_back(Entry(L));
- // For each active loop the block is in, decrement the count. If MBB is
- // the last block in an active loop, take it off the list and pick up any
- // blocks deferred because the header didn't dominate them.
- for (Entry &E : Loops)
- if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
+ const Region *R = SUI.getRegionFor(MBB);
+ if (R) {
+ // If MBB is a region header, add it to the active region list. We can't
+ // put any blocks that it doesn't dominate until we see the end of the
+ // region.
+ if (R->getHeader() == MBB)
+ Entries.push_back(Entry(R));
+ // For each active region the block is in, decrement the count. If MBB is
+ // the last block in an active region, take it off the list and pick up
+ // any blocks deferred because the header didn't dominate them.
+ for (Entry &E : Entries)
+ if (E.TheRegion->contains(MBB) && --E.NumBlocksLeft == 0)
for (auto DeferredBlock : E.Deferred)
Ready.push(DeferredBlock);
- while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
- Loops.pop_back();
+ while (!Entries.empty() && Entries.back().NumBlocksLeft == 0)
+ Entries.pop_back();
}
// The main topological sort logic.
for (MachineBasicBlock *Succ : MBB->successors()) {
@@ -177,19 +296,19 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
while (!Preferred.empty()) {
Next = Preferred.top();
Preferred.pop();
- // If X isn't dominated by the top active loop header, defer it until that
- // loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
+ // If X isn't dominated by the top active region header, defer it until
+ // that region is done.
+ if (!Entries.empty() &&
+ !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+ Entries.back().Deferred.push_back(Next);
Next = nullptr;
continue;
}
// If Next was originally ordered before MBB, and it isn't because it was
// loop-rotated above the header, it's not preferred.
if (Next->getNumber() < MBB->getNumber() &&
- (!L || !L->contains(Next) ||
- L->getHeader()->getNumber() < Next->getNumber())) {
+ (!R || !R->contains(Next) ||
+ R->getHeader()->getNumber() < Next->getNumber())) {
Ready.push(Next);
Next = nullptr;
continue;
@@ -207,11 +326,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (;;) {
Next = Ready.top();
Ready.pop();
- // If Next isn't dominated by the top active loop header, defer it until
- // that loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
+ // If Next isn't dominated by the top active region header, defer it
+ // until that region is done.
+ if (!Entries.empty() &&
+ !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+ Entries.back().Deferred.push_back(Next);
continue;
}
break;
@@ -222,11 +341,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
MaybeUpdateTerminator(MBB);
MBB = Next;
}
- assert(Loops.empty() && "Active loop list not finished");
+ assert(Entries.empty() && "Active sort region list not finished");
MF.RenumberBlocks();
#ifndef NDEBUG
- SmallSetVector<MachineLoop *, 8> OnStack;
+ SmallSetVector<const Region *, 8> OnStack;
// Insert a sentinel representing the degenerate loop that starts at the
// function entry block and includes the entire function as a "loop" that
@@ -235,29 +354,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto &MBB : MF) {
assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+ const Region *Region = SUI.getRegionFor(&MBB);
+
+ if (Region && &MBB == Region->getHeader()) {
+ if (Region->isLoop()) {
+ // Loop header. The loop predecessor should be sorted above, and the
+ // other predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Region->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or "
+ "backedges");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ }
+ assert(OnStack.insert(Region) &&
+ "Regions should be declared at most once.");
- MachineLoop *Loop = MLI.getLoopFor(&MBB);
- if (Loop && &MBB == Loop->getHeader()) {
- // Loop header. The loop predecessor should be sorted above, and the other
- // predecessors should be backedges below.
- for (auto Pred : MBB.predecessors())
- assert(
- (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
- "Loop header predecessors must be loop predecessors or backedges");
- assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
} else {
// Not a loop header. All predecessors should be sorted above.
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
- "Blocks must be nested in their loops");
+ assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
+ "Blocks must be nested in their regions");
}
while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
OnStack.pop_back();
}
assert(OnStack.pop_back_val() == nullptr &&
- "The function entry block shouldn't actually be a loop header");
+ "The function entry block shouldn't actually be a region header");
assert(OnStack.empty() &&
"Control flow stack pushes and pops should be balanced.");
#endif
@@ -269,12 +398,13 @@ bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
<< MF.getName() << '\n');
const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
auto &MDT = getAnalysis<MachineDominatorTree>();
// Liveness is not tracked for VALUE_STACK physreg.
MF.getRegInfo().invalidateLiveness();
- // Sort the blocks, with contiguous loops.
- SortBlocks(MF, MLI, MDT);
+ // Sort the blocks, with contiguous sort regions.
+ SortBlocks(MF, MLI, WEI, MDT);
return true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 70ce40cefed7..f8f5f4040c86 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -10,16 +10,21 @@
/// \file
/// This file implements a CFG stacking pass.
///
-/// This pass inserts BLOCK and LOOP markers to mark the start of scopes, since
-/// scope boundaries serve as the labels for WebAssembly's control transfers.
+/// This pass inserts BLOCK, LOOP, and TRY markers to mark the start of scopes,
+/// since scope boundaries serve as the labels for WebAssembly's control
+/// transfers.
///
/// This is sufficient to convert arbitrary CFGs into a form that works on
/// WebAssembly, provided that all loops are single-entry.
///
+/// In case we use exceptions, this pass also fixes mismatches in unwind
+/// destinations created during transforming CFG into wasm structured format.
+///
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
@@ -29,6 +34,8 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -40,26 +47,57 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<WebAssemblyExceptionInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ // For each block whose label represents the end of a scope, record the block
+ // which holds the beginning of the scope. This will allow us to quickly skip
+ // over scoped regions when walking blocks.
+ SmallVector<MachineBasicBlock *, 8> ScopeTops;
+
+ void placeMarkers(MachineFunction &MF);
+ void placeBlockMarker(MachineBasicBlock &MBB);
+ void placeLoopMarker(MachineBasicBlock &MBB);
+ void placeTryMarker(MachineBasicBlock &MBB);
+ void rewriteDepthImmediates(MachineFunction &MF);
+ void fixEndsAtEndOfFunction(MachineFunction &MF);
+
+ // For each BLOCK|LOOP|TRY, the corresponding END_(BLOCK|LOOP|TRY).
+ DenseMap<const MachineInstr *, MachineInstr *> BeginToEnd;
+ // For each END_(BLOCK|LOOP|TRY), the corresponding BLOCK|LOOP|TRY.
+ DenseMap<const MachineInstr *, MachineInstr *> EndToBegin;
+ // <TRY marker, EH pad> map
+ DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad;
+ // <EH pad, TRY marker> map
+ DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry;
+ // <LOOP|TRY marker, Loop/exception bottom BB> map
+ DenseMap<const MachineInstr *, MachineBasicBlock *> BeginToBottom;
+
+ // Helper functions to register scope information created by marker
+ // instructions.
+ void registerScope(MachineInstr *Begin, MachineInstr *End);
+ void registerTryScope(MachineInstr *Begin, MachineInstr *End,
+ MachineBasicBlock *EHPad);
+
+ MachineBasicBlock *getBottom(const MachineInstr *Begin);
+
public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+ ~WebAssemblyCFGStackify() override { releaseMemory(); }
+ void releaseMemory() override;
};
} // end anonymous namespace
char WebAssemblyCFGStackify::ID = 0;
INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE,
- "Insert BLOCK and LOOP markers for WebAssembly scopes",
- false, false)
+ "Insert BLOCK and LOOP markers for WebAssembly scopes", false,
+ false)
FunctionPass *llvm::createWebAssemblyCFGStackify() {
return new WebAssemblyCFGStackify();
@@ -73,34 +111,121 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
MachineBasicBlock *MBB) {
for (MachineInstr &MI : Pred->terminators())
- for (MachineOperand &MO : MI.explicit_operands())
- if (MO.isMBB() && MO.getMBB() == MBB)
- return true;
+ // Even if a rethrow takes a BB argument, it is not a branch
+ if (!WebAssembly::isRethrow(MI))
+ for (MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB() == MBB)
+ return true;
return false;
}
+// Returns an iterator to the earliest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, AfterSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetEarliestInsertPos(MachineBasicBlock *MBB,
+ const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+ const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+ auto InsertPos = MBB->end();
+ while (InsertPos != MBB->begin()) {
+ if (BeforeSet.count(&*std::prev(InsertPos))) {
+#ifndef NDEBUG
+ // Sanity check
+ for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos)
+ assert(!AfterSet.count(&*std::prev(Pos)));
+#endif
+ break;
+ }
+ --InsertPos;
+ }
+ return InsertPos;
+}
+
+// Returns an iterator to the latest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, BeforeSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetLatestInsertPos(MachineBasicBlock *MBB,
+ const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+ const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+ auto InsertPos = MBB->begin();
+ while (InsertPos != MBB->end()) {
+ if (AfterSet.count(&*InsertPos)) {
+#ifndef NDEBUG
+ // Sanity check
+ for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos)
+ assert(!BeforeSet.count(&*Pos));
+#endif
+ break;
+ }
+ ++InsertPos;
+ }
+ return InsertPos;
+}
+
+void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin,
+ MachineInstr *End) {
+ BeginToEnd[Begin] = End;
+ EndToBegin[End] = Begin;
+}
+
+void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin,
+ MachineInstr *End,
+ MachineBasicBlock *EHPad) {
+ registerScope(Begin, End);
+ TryToEHPad[Begin] = EHPad;
+ EHPadToTry[EHPad] = Begin;
+}
+
+// Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any
+// to prevent recomputation.
+MachineBasicBlock *
+WebAssemblyCFGStackify::getBottom(const MachineInstr *Begin) {
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ if (BeginToBottom.count(Begin))
+ return BeginToBottom[Begin];
+ if (Begin->getOpcode() == WebAssembly::LOOP) {
+ MachineLoop *L = MLI.getLoopFor(Begin->getParent());
+ assert(L);
+ BeginToBottom[Begin] = WebAssembly::getBottom(L);
+ } else if (Begin->getOpcode() == WebAssembly::TRY) {
+ WebAssemblyException *WE = WEI.getExceptionFor(TryToEHPad[Begin]);
+ assert(WE);
+ BeginToBottom[Begin] = WebAssembly::getBottom(WE);
+ } else
+ assert(false);
+ return BeginToBottom[Begin];
+}
+
/// Insert a BLOCK marker for branches to MBB (if needed).
-static void PlaceBlockMarker(
- MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
- const WebAssemblyInstrInfo &TII,
- const MachineLoopInfo &MLI,
- MachineDominatorTree &MDT,
- WebAssemblyFunctionInfo &MFI) {
+void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
+ // This should have been handled in placeTryMarker.
+ if (MBB.isEHPad())
+ return;
+
+ MachineFunction &MF = *MBB.getParent();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
// First compute the nearest common dominator of all forward non-fallthrough
// predecessors so that we minimize the time that the BLOCK is on the stack,
// which reduces overall stack height.
MachineBasicBlock *Header = nullptr;
bool IsBranchedTo = false;
int MBBNumber = MBB.getNumber();
- for (MachineBasicBlock *Pred : MBB.predecessors())
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
if (Pred->getNumber() < MBBNumber) {
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
if (ExplicitlyBranchesTo(Pred, &MBB))
IsBranchedTo = true;
}
+ }
if (!Header)
return;
if (!IsBranchedTo)
@@ -125,43 +250,93 @@ static void PlaceBlockMarker(
}
// Decide where in Header to put the BLOCK.
- MachineBasicBlock::iterator InsertPos;
- MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
- if (HeaderLoop &&
- MBB.getNumber() > WebAssembly::getBottom(HeaderLoop)->getNumber()) {
- // Header is the header of a loop that does not lexically contain MBB, so
- // the BLOCK needs to be above the LOOP, after any END constructs.
- InsertPos = Header->begin();
- while (InsertPos->getOpcode() == WebAssembly::END_BLOCK ||
- InsertPos->getOpcode() == WebAssembly::END_LOOP)
- ++InsertPos;
- } else {
- // Otherwise, insert the BLOCK as late in Header as we can, but before the
- // beginning of the local expression tree and any nested BLOCKs.
- InsertPos = Header->getFirstTerminator();
- while (InsertPos != Header->begin() &&
- WebAssembly::isChild(*std::prev(InsertPos), MFI) &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
- std::prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
- --InsertPos;
+
+ // Instructions that should go before the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ // Instructions that should go after the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : *Header) {
+ // If there is a previously placed LOOP/TRY marker and the bottom block of
+ // the loop/exception is above MBB, it should be after the BLOCK, because
+ // the loop/exception is nested in this block. Otherwise it should be before
+ // the BLOCK.
+ if (MI.getOpcode() == WebAssembly::LOOP ||
+ MI.getOpcode() == WebAssembly::TRY) {
+ if (MBB.getNumber() > getBottom(&MI)->getNumber())
+ AfterSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ BeforeSet.insert(&MI);
+#endif
+ }
+
+ // All previously inserted BLOCK markers should be after the BLOCK because
+ // they are all nested blocks.
+ if (MI.getOpcode() == WebAssembly::BLOCK)
+ AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+ // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK.
+ if (MI.getOpcode() == WebAssembly::END_BLOCK ||
+ MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY)
+ BeforeSet.insert(&MI);
+#endif
+
+ // Terminators should go after the BLOCK.
+ if (MI.isTerminator())
+ AfterSet.insert(&MI);
+ }
+
+ // Local expression tree should go after the BLOCK.
+ for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+ --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
}
// Add the BLOCK.
+ auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
TII.get(WebAssembly::BLOCK))
.addImm(int64_t(WebAssembly::ExprType::Void));
+ // Decide where in Header to put the END_BLOCK.
+ BeforeSet.clear();
+ AfterSet.clear();
+ for (auto &MI : MBB) {
+#ifndef NDEBUG
+ // END_BLOCK should precede existing LOOP and TRY markers.
+ if (MI.getOpcode() == WebAssembly::LOOP ||
+ MI.getOpcode() == WebAssembly::TRY)
+ AfterSet.insert(&MI);
+#endif
+
+ // If there is a previously placed END_LOOP marker and the header of the
+ // loop is above this block's header, the END_LOOP should be placed after
+ // the BLOCK, because the loop contains this block. Otherwise the END_LOOP
+ // should be placed before the BLOCK. The same for END_TRY.
+ if (MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY) {
+ if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+ }
+
// Mark the end of the block.
- InsertPos = MBB.begin();
- while (InsertPos != MBB.end() &&
- InsertPos->getOpcode() == WebAssembly::END_LOOP &&
- LoopTops[&*InsertPos]->getParent()->getNumber() >= Header->getNumber())
- ++InsertPos;
+ InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos),
TII.get(WebAssembly::END_BLOCK));
- BlockTops[End] = Begin;
+ registerScope(Begin, End);
// Track the farthest-spanning scope that ends at this point.
int Number = MBB.getNumber();
@@ -171,11 +346,11 @@ static void PlaceBlockMarker(
}
/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(
- MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
- const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
+void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
MachineLoop *Loop = MLI.getLoopFor(&MBB);
if (!Loop || Loop->getHeader() != &MBB)
return;
@@ -193,22 +368,43 @@ static void PlaceLoopMarker(
}
MachineBasicBlock *AfterLoop = &*Iter;
- // Mark the beginning of the loop (after the end of any existing loop that
- // ends here).
- auto InsertPos = MBB.begin();
- while (InsertPos != MBB.end() &&
- InsertPos->getOpcode() == WebAssembly::END_LOOP)
- ++InsertPos;
+ // Decide where in Header to put the LOOP.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : MBB) {
+ // LOOP marker should be after any existing loop that ends here. Otherwise
+ // we assume the instruction belongs to the loop.
+ if (MI.getOpcode() == WebAssembly::END_LOOP)
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+
+ // Mark the beginning of the loop.
+ auto InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
TII.get(WebAssembly::LOOP))
.addImm(int64_t(WebAssembly::ExprType::Void));
- // Mark the end of the loop (using arbitrary debug location that branched
- // to the loop end as its location).
+ // Decide where in Header to put the END_LOOP.
+ BeforeSet.clear();
+ AfterSet.clear();
+#ifndef NDEBUG
+ for (const auto &MI : MBB)
+ // Existing END_LOOP markers belong to parent loops of this loop
+ if (MI.getOpcode() == WebAssembly::END_LOOP)
+ AfterSet.insert(&MI);
+#endif
+
+ // Mark the end of the loop (using arbitrary debug location that branched to
+ // the loop end as its location).
+ InsertPos = GetEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
DebugLoc EndDL = (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
- MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), EndDL,
- TII.get(WebAssembly::END_LOOP));
- LoopTops[End] = Begin;
+ MachineInstr *End =
+ BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP));
+ registerScope(Begin, End);
assert((!ScopeTops[AfterLoop->getNumber()] ||
ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -217,6 +413,183 @@ static void PlaceLoopMarker(
ScopeTops[AfterLoop->getNumber()] = &MBB;
}
+void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
+ if (!MBB.isEHPad())
+ return;
+
+ // catch_all terminate pad is grouped together with catch terminate pad and
+ // does not need a separate TRY and END_TRY marker.
+ if (WebAssembly::isCatchAllTerminatePad(MBB))
+ return;
+
+ MachineFunction &MF = *MBB.getParent();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ // Compute the nearest common dominator of all unwind predecessors
+ MachineBasicBlock *Header = nullptr;
+ int MBBNumber = MBB.getNumber();
+ for (auto *Pred : MBB.predecessors()) {
+ if (Pred->getNumber() < MBBNumber) {
+ Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+ assert(!ExplicitlyBranchesTo(Pred, &MBB) &&
+ "Explicit branch to an EH pad!");
+ }
+ }
+ if (!Header)
+ return;
+
+ // If this try is at the bottom of the function, insert a dummy block at the
+ // end.
+ WebAssemblyException *WE = WEI.getExceptionFor(&MBB);
+ assert(WE);
+ MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
+
+ auto Iter = std::next(MachineFunction::iterator(Bottom));
+ if (Iter == MF.end()) {
+ MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+ // Give it a fake predecessor so that AsmPrinter prints its label.
+ Label->addSuccessor(Label);
+ MF.push_back(Label);
+ Iter = std::next(MachineFunction::iterator(Bottom));
+ }
+ MachineBasicBlock *AfterTry = &*Iter;
+
+ assert(AfterTry != &MF.front());
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(AfterTry));
+
+ // If the nearest common dominator is inside a more deeply nested context,
+ // walk out to the nearest scope which isn't more deeply nested.
+ for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+ if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+ if (ScopeTop->getNumber() > Header->getNumber()) {
+ // Skip over an intervening scope.
+ I = std::next(MachineFunction::iterator(ScopeTop));
+ } else {
+ // We found a scope level at an appropriate depth.
+ Header = ScopeTop;
+ break;
+ }
+ }
+ }
+
+ // Decide where in Header to put the TRY.
+
+ // Instructions that should go before the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+ // Instructions that should go after the BLOCK.
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ for (const auto &MI : *Header) {
+ // If there is a previously placed LOOP marker and the bottom block of
+ // the loop is above MBB, the LOOP should be after the TRY, because the
+ // loop is nested in this try. Otherwise it should be before the TRY.
+ if (MI.getOpcode() == WebAssembly::LOOP) {
+ if (MBB.getNumber() > Bottom->getNumber())
+ AfterSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ BeforeSet.insert(&MI);
+#endif
+ }
+
+ // All previously inserted TRY markers should be after the TRY because they
+ // are all nested trys.
+ if (MI.getOpcode() == WebAssembly::TRY)
+ AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+ // All END_(LOOP/TRY) markers should be before the TRY.
+ if (MI.getOpcode() == WebAssembly::END_LOOP ||
+ MI.getOpcode() == WebAssembly::END_TRY)
+ BeforeSet.insert(&MI);
+#endif
+
+ // Terminators should go after the TRY.
+ if (MI.isTerminator())
+ AfterSet.insert(&MI);
+ }
+
+ // Local expression tree should go after the TRY.
+ for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+ --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
+ }
+
+ // If Header unwinds to MBB (= Header contains 'invoke'), the try block should
+ // contain the call within it. So the call should go after the TRY. The
+ // exception is when the header's terminator is a rethrow instruction, in
+ // which case that instruction, not a call instruction before it, is gonna
+ // throw.
+ if (MBB.isPredecessor(Header)) {
+ auto TermPos = Header->getFirstTerminator();
+ if (TermPos == Header->end() || !WebAssembly::isRethrow(*TermPos)) {
+ for (const auto &MI : reverse(*Header)) {
+ if (MI.isCall()) {
+ AfterSet.insert(&MI);
+ break;
+ }
+ }
+ }
+ }
+
+ // Add the TRY.
+ auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+ MachineInstr *Begin =
+ BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
+ TII.get(WebAssembly::TRY))
+ .addImm(int64_t(WebAssembly::ExprType::Void));
+
+ // Decide where in Header to put the END_TRY.
+ BeforeSet.clear();
+ AfterSet.clear();
+ for (const auto &MI : *AfterTry) {
+#ifndef NDEBUG
+ // END_TRY should precede existing LOOP markers.
+ if (MI.getOpcode() == WebAssembly::LOOP)
+ AfterSet.insert(&MI);
+
+ // All END_TRY markers placed earlier belong to exceptions that contains
+ // this one.
+ if (MI.getOpcode() == WebAssembly::END_TRY)
+ AfterSet.insert(&MI);
+#endif
+
+ // If there is a previously placed END_LOOP marker and its header is after
+ // where TRY marker is, this loop is contained within the 'catch' part, so
+ // the END_TRY marker should go after that. Otherwise, the whole try-catch
+ // is contained within this loop, so the END_TRY should go before that.
+ if (MI.getOpcode() == WebAssembly::END_LOOP) {
+ if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+ BeforeSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ AfterSet.insert(&MI);
+#endif
+ }
+ }
+
+ // Mark the end of the TRY.
+ InsertPos = GetEarliestInsertPos(AfterTry, BeforeSet, AfterSet);
+ MachineInstr *End =
+ BuildMI(*AfterTry, InsertPos, Bottom->findBranchDebugLoc(),
+ TII.get(WebAssembly::END_TRY));
+ registerTryScope(Begin, End, &MBB);
+
+ // Track the farthest-spanning scope that ends at this point.
+ int Number = AfterTry->getNumber();
+ if (!ScopeTops[Number] ||
+ ScopeTops[Number]->getNumber() > Header->getNumber())
+ ScopeTops[Number] = Header;
+}
+
static unsigned
GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
const MachineBasicBlock *MBB) {
@@ -237,11 +610,8 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
/// that end at the function end need to have a return type signature that
/// matches the function signature, even though it's unreachable. This function
/// checks for such cases and fixes up the signatures.
-static void FixEndsAtEndOfFunction(
- MachineFunction &MF,
- const WebAssemblyFunctionInfo &MFI,
- DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
- DenseMap<const MachineInstr *, MachineInstr *> &LoopTops) {
+void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
+ const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
assert(MFI.getResults().size() <= 1);
if (MFI.getResults().empty())
@@ -249,16 +619,31 @@ static void FixEndsAtEndOfFunction(
WebAssembly::ExprType retType;
switch (MFI.getResults().front().SimpleTy) {
- case MVT::i32: retType = WebAssembly::ExprType::I32; break;
- case MVT::i64: retType = WebAssembly::ExprType::I64; break;
- case MVT::f32: retType = WebAssembly::ExprType::F32; break;
- case MVT::f64: retType = WebAssembly::ExprType::F64; break;
- case MVT::v16i8: retType = WebAssembly::ExprType::I8x16; break;
- case MVT::v8i16: retType = WebAssembly::ExprType::I16x8; break;
- case MVT::v4i32: retType = WebAssembly::ExprType::I32x4; break;
- case MVT::v4f32: retType = WebAssembly::ExprType::F32x4; break;
- case MVT::ExceptRef: retType = WebAssembly::ExprType::ExceptRef; break;
- default: llvm_unreachable("unexpected return type");
+ case MVT::i32:
+ retType = WebAssembly::ExprType::I32;
+ break;
+ case MVT::i64:
+ retType = WebAssembly::ExprType::I64;
+ break;
+ case MVT::f32:
+ retType = WebAssembly::ExprType::F32;
+ break;
+ case MVT::f64:
+ retType = WebAssembly::ExprType::F64;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ retType = WebAssembly::ExprType::V128;
+ break;
+ case MVT::ExceptRef:
+ retType = WebAssembly::ExprType::ExceptRef;
+ break;
+ default:
+ llvm_unreachable("unexpected return type");
}
for (MachineBasicBlock &MBB : reverse(MF)) {
@@ -266,11 +651,11 @@ static void FixEndsAtEndOfFunction(
if (MI.isPosition() || MI.isDebugInstr())
continue;
if (MI.getOpcode() == WebAssembly::END_BLOCK) {
- BlockTops[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
continue;
}
if (MI.getOpcode() == WebAssembly::END_LOOP) {
- LoopTops[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
continue;
}
// Something other than an `end`. We're done.
@@ -281,60 +666,108 @@ static void FixEndsAtEndOfFunction(
// WebAssembly functions end with an end instruction, as if the function body
// were a block.
-static void AppendEndToFunction(
- MachineFunction &MF,
- const WebAssemblyInstrInfo &TII) {
+static void AppendEndToFunction(MachineFunction &MF,
+ const WebAssemblyInstrInfo &TII) {
BuildMI(MF.back(), MF.back().end(),
MF.back().findPrevDebugLoc(MF.back().end()),
TII.get(WebAssembly::END_FUNCTION));
}
-/// Insert LOOP and BLOCK markers at appropriate places.
-static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
- const WebAssemblyInstrInfo &TII,
- MachineDominatorTree &MDT,
- WebAssemblyFunctionInfo &MFI) {
- // For each block whose label represents the end of a scope, record the block
- // which holds the beginning of the scope. This will allow us to quickly skip
- // over scoped regions when walking blocks. We allocate one more than the
- // number of blocks in the function to accommodate for the possible fake block
- // we may insert at the end.
- SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
-
- // For each LOOP_END, the corresponding LOOP.
- DenseMap<const MachineInstr *, MachineInstr *> LoopTops;
-
- // For each END_BLOCK, the corresponding BLOCK.
- DenseMap<const MachineInstr *, MachineInstr *> BlockTops;
-
- for (auto &MBB : MF) {
- // Place the LOOP for MBB if MBB is the header of a loop.
- PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
-
- // Place the BLOCK for MBB if MBB is branched to from above.
- PlaceBlockMarker(MBB, MF, ScopeTops, BlockTops, LoopTops, TII, MLI, MDT, MFI);
- }
+/// Insert LOOP/TRY/BLOCK markers at appropriate places.
+void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
+ const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
+ // We allocate one more than the number of blocks in the function to
+ // accommodate for the possible fake block we may insert at the end.
+ ScopeTops.resize(MF.getNumBlockIDs() + 1);
+ // Place the LOOP for MBB if MBB is the header of a loop.
+ for (auto &MBB : MF)
+ placeLoopMarker(MBB);
+ // Place the TRY for MBB if MBB is the EH pad of an exception.
+ if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn())
+ for (auto &MBB : MF)
+ placeTryMarker(MBB);
+ // Place the BLOCK for MBB if MBB is branched to from above.
+ for (auto &MBB : MF)
+ placeBlockMarker(MBB);
+}
+void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
// Now rewrite references to basic blocks to be depth immediates.
+ // We need two stacks: one for normal scopes and the other for EH pad scopes.
+ // EH pad stack is used to rewrite depths in rethrow instructions.
SmallVector<const MachineBasicBlock *, 8> Stack;
+ SmallVector<const MachineBasicBlock *, 8> EHPadStack;
for (auto &MBB : reverse(MF)) {
- for (auto &MI : reverse(MBB)) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
+ MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
- assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= MBB.getNumber() &&
- "Block should be balanced");
+ assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+ MBB.getNumber() &&
+ "Block/try should be balanced");
Stack.pop_back();
break;
+
+ case WebAssembly::TRY:
+ assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+ MBB.getNumber() &&
+ "Block/try marker should be balanced");
+ Stack.pop_back();
+ EHPadStack.pop_back();
+ break;
+
+ case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_ALL:
+ // Currently the only case there are more than one catch for a try is
+ // for catch terminate pad, in the form of
+ // try
+ // catch
+ // call @__clang_call_terminate
+ // unreachable
+ // catch_all
+ // call @std::terminate
+ // unreachable
+ // end
+ // So we shouldn't push the current BB for the second catch_all block
+ // here.
+ if (!WebAssembly::isCatchAllTerminatePad(MBB))
+ EHPadStack.push_back(&MBB);
+ break;
+
case WebAssembly::LOOP:
assert(Stack.back() == &MBB && "Loop top should be balanced");
Stack.pop_back();
break;
+
case WebAssembly::END_BLOCK:
+ case WebAssembly::END_TRY:
Stack.push_back(&MBB);
break;
+
case WebAssembly::END_LOOP:
- Stack.push_back(LoopTops[&MI]->getParent());
+ Stack.push_back(EndToBegin[&MI]->getParent());
+ break;
+
+ case WebAssembly::RETHROW: {
+ // Rewrite MBB operands to be depth immediates.
+ unsigned EHPadDepth = GetDepth(EHPadStack, MI.getOperand(0).getMBB());
+ MI.RemoveOperand(0);
+ MI.addOperand(MF, MachineOperand::CreateImm(EHPadDepth));
break;
+ }
+
+ case WebAssembly::RETHROW_TO_CALLER: {
+ MachineInstr *Rethrow =
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(WebAssembly::RETHROW))
+ .addImm(EHPadStack.size());
+ MI.eraseFromParent();
+ I = MachineBasicBlock::reverse_iterator(Rethrow);
+ break;
+ }
+
default:
if (MI.isTerminator()) {
// Rewrite MBB operands to be depth immediates.
@@ -352,13 +785,15 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
}
}
assert(Stack.empty() && "Control flow should be balanced");
+}
- // Fix up block/loop signatures at the end of the function to conform to
- // WebAssembly's rules.
- FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
-
- // Add an end instruction at the end of the function body.
- AppendEndToFunction(MF, TII);
+void WebAssemblyCFGStackify::releaseMemory() {
+ ScopeTops.clear();
+ BeginToEnd.clear();
+ EndToBegin.clear();
+ TryToEHPad.clear();
+ EHPadToTry.clear();
+ BeginToBottom.clear();
}
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -366,15 +801,27 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
"********** Function: "
<< MF.getName() << '\n');
- const auto &MLI = getAnalysis<MachineLoopInfo>();
- auto &MDT = getAnalysis<MachineDominatorTree>();
+ releaseMemory();
+
// Liveness is not tracked for VALUE_STACK physreg.
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MF.getRegInfo().invalidateLiveness();
- // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
- PlaceMarkers(MF, MLI, TII, MDT, MFI);
+ // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes.
+ placeMarkers(MF);
+
+ // Convert MBB operands in terminators to relative depth immediates.
+ rewriteDepthImmediates(MF);
+
+ // Fix up block/loop/try signatures at the end of the function to conform to
+ // WebAssembly's rules.
+ fixEndsAtEndOfFunction(MF);
+
+ // Add an end instruction at the end of the function body.
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ if (!MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple()
+ .isOSBinFormatELF())
+ AppendEndToFunction(MF, TII);
return true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index c1820bf66bc0..aaa6d286598f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -64,16 +64,30 @@ FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
switch (MI.getOpcode()) {
using namespace WebAssembly;
- case PCALL_INDIRECT_VOID: return CALL_INDIRECT_VOID;
- case PCALL_INDIRECT_I32: return CALL_INDIRECT_I32;
- case PCALL_INDIRECT_I64: return CALL_INDIRECT_I64;
- case PCALL_INDIRECT_F32: return CALL_INDIRECT_F32;
- case PCALL_INDIRECT_F64: return CALL_INDIRECT_F64;
- case PCALL_INDIRECT_v16i8: return CALL_INDIRECT_v16i8;
- case PCALL_INDIRECT_v8i16: return CALL_INDIRECT_v8i16;
- case PCALL_INDIRECT_v4i32: return CALL_INDIRECT_v4i32;
- case PCALL_INDIRECT_v4f32: return CALL_INDIRECT_v4f32;
- default: return INSTRUCTION_LIST_END;
+ case PCALL_INDIRECT_VOID:
+ return CALL_INDIRECT_VOID;
+ case PCALL_INDIRECT_I32:
+ return CALL_INDIRECT_I32;
+ case PCALL_INDIRECT_I64:
+ return CALL_INDIRECT_I64;
+ case PCALL_INDIRECT_F32:
+ return CALL_INDIRECT_F32;
+ case PCALL_INDIRECT_F64:
+ return CALL_INDIRECT_F64;
+ case PCALL_INDIRECT_v16i8:
+ return CALL_INDIRECT_v16i8;
+ case PCALL_INDIRECT_v8i16:
+ return CALL_INDIRECT_v8i16;
+ case PCALL_INDIRECT_v4i32:
+ return CALL_INDIRECT_v4i32;
+ case PCALL_INDIRECT_v2i64:
+ return CALL_INDIRECT_v2i64;
+ case PCALL_INDIRECT_v4f32:
+ return CALL_INDIRECT_v4f32;
+ case PCALL_INDIRECT_v2f64:
+ return CALL_INDIRECT_v2f64;
+ default:
+ return INSTRUCTION_LIST_END;
}
}
@@ -84,7 +98,7 @@ static bool IsPseudoCallIndirect(const MachineInstr &MI) {
bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
- << MF.getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
bool Changed = false;
const WebAssemblyInstrInfo *TII =
@@ -110,10 +124,8 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
Ops.push_back(MachineOperand::CreateImm(0));
for (const MachineOperand &MO :
- make_range(MI.operands_begin() +
- MI.getDesc().getNumDefs() + 1,
- MI.operands_begin() +
- MI.getNumExplicitOperands()))
+ make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1,
+ MI.operands_begin() + MI.getNumExplicitOperands()))
Ops.push_back(MO);
Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
@@ -133,4 +145,3 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
new file mode 100644
index 000000000000..8ecc159951ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -0,0 +1,46 @@
+//===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the manager for MachineInstr DebugValues.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyDebugValueManager.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+
+WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(
+ MachineInstr *Instr) {
+ Instr->collectDebugValues(DbgValues);
+}
+
+void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
+ MachineBasicBlock *MBB = Insert->getParent();
+ for (MachineInstr *DBI : reverse(DbgValues))
+ MBB->splice(Insert, DBI->getParent(), DBI);
+}
+
+void WebAssemblyDebugValueManager::updateReg(unsigned Reg) {
+ for (auto *DBI : DbgValues)
+ DBI->getOperand(0).setReg(Reg);
+}
+
+void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
+ unsigned NewReg) {
+ MachineBasicBlock *MBB = Insert->getParent();
+ MachineFunction *MF = MBB->getParent();
+ for (MachineInstr *DBI : reverse(DbgValues)) {
+ MachineInstr *Clone = MF->CloneMachineInstr(DBI);
+ Clone->getOperand(0).setReg(NewReg);
+ MBB->insert(Insert, Clone);
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
new file mode 100644
index 000000000000..73f317214058
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -0,0 +1,38 @@
+// WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the WebAssembly-specific
+/// manager for DebugValues associated with the specific MachineInstr.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class WebAssemblyDebugValueManager {
+ SmallVector<MachineInstr *, 2> DbgValues;
+
+public:
+ WebAssemblyDebugValueManager(MachineInstr *Instr);
+
+ void move(MachineInstr *Insert);
+ void updateReg(unsigned Reg);
+ void clone(MachineInstr *Insert, unsigned NewReg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
new file mode 100644
index 000000000000..c86260ba408c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
@@ -0,0 +1,87 @@
+//===-- WebAssemblyEHRestoreStackPointer.cpp - __stack_pointer restoration ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// After the stack is unwound due to a thrown exception, the __stack_pointer
+/// global can point to an invalid address. This inserts instructions that
+/// restore __stack_pointer global.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-eh-restore-stack-pointer"
+
+namespace {
+class WebAssemblyEHRestoreStackPointer final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyEHRestoreStackPointer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "WebAssembly Restore Stack Pointer for Exception Handling";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyEHRestoreStackPointer::ID = 0;
+INITIALIZE_PASS(WebAssemblyEHRestoreStackPointer, DEBUG_TYPE,
+ "Restore Stack Pointer for Exception Handling", true, false)
+
+FunctionPass *llvm::createWebAssemblyEHRestoreStackPointer() {
+ return new WebAssemblyEHRestoreStackPointer();
+}
+
+bool WebAssemblyEHRestoreStackPointer::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EH Restore Stack Pointer **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
+ if (!FrameLowering->needsPrologForEH(MF))
+ return false;
+ bool Changed = false;
+
+ for (auto &MBB : MF) {
+ if (!MBB.isEHPad())
+ continue;
+ Changed = true;
+
+ // Insert __stack_pointer restoring instructions at the beginning of each EH
+ // pad, after the catch instruction. (Catch instructions may have been
+ // reordered, and catch_all instructions have not been inserted yet, but
+ // those cases are handled in LateEHPrepare).
+ //
+ // Here it is safe to assume that SP32 holds the latest value of
+ // __stack_pointer, because the only exception for this case is when a
+ // function uses the red zone, but that only happens with leaf functions,
+ // and we don't restore __stack_pointer in leaf functions anyway.
+ auto InsertPos = MBB.begin();
+ if (WebAssembly::isCatch(*MBB.begin()))
+ InsertPos++;
+ FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
+ MBB.begin()->getDebugLoc());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 84683d48a90a..6b3a3e765786 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyExceptionInfo.h"
-#include "WebAssemblyUtilities.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -32,7 +32,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
INITIALIZE_PASS_END(WebAssemblyExceptionInfo, DEBUG_TYPE,
"WebAssembly Exception Information", true, true)
-bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &F) {
+bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Exception Info Calculation **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
releaseMemory();
auto &MDT = getAnalysis<MachineDominatorTree>();
auto &MDF = getAnalysis<MachineDominanceFrontier>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 8619cbdcb5ee..27aabe6ba0bd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -11,7 +11,7 @@
/// This file converts any remaining registers into WebAssembly locals.
///
/// After register stackification and register coloring, convert non-stackified
-/// registers into locals, inserting explicit get_local and set_local
+/// registers into locals, inserting explicit local.get and local.set
/// instructions.
///
//===----------------------------------------------------------------------===//
@@ -31,12 +31,14 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-explicit-locals"
-// A command-line option to disable this pass. Note that this produces output
-// which is not valid WebAssembly, though it may be more convenient for writing
-// LLVM unit tests with.
-static cl::opt<bool> DisableWebAssemblyExplicitLocals(
- "disable-wasm-explicit-locals", cl::ReallyHidden,
- cl::desc("WebAssembly: Disable emission of get_local/set_local."),
+// A command-line option to disable this pass, and keep implicit locals
+// for the purpose of testing with lit/llc ONLY.
+// This produces output which is not valid WebAssembly, and is not supported
+// by assemblers/disassemblers and other MC based tools.
+static cl::opt<bool> WasmDisableExplicitLocals(
+ "wasm-disable-explicit-locals", cl::Hidden,
+ cl::desc("WebAssembly: output implicit locals in"
+ " instruction output for test purposes only."),
cl::init(false));
namespace {
@@ -94,54 +96,54 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate get_local opcode for the given register class.
+/// Get the appropriate local.get opcode for the given register class.
static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::GET_LOCAL_I32;
+ return WebAssembly::LOCAL_GET_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::GET_LOCAL_I64;
+ return WebAssembly::LOCAL_GET_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::GET_LOCAL_F32;
+ return WebAssembly::LOCAL_GET_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::GET_LOCAL_F64;
+ return WebAssembly::LOCAL_GET_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::GET_LOCAL_V128;
+ return WebAssembly::LOCAL_GET_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::GET_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_GET_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate set_local opcode for the given register class.
+/// Get the appropriate local.set opcode for the given register class.
static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::SET_LOCAL_I32;
+ return WebAssembly::LOCAL_SET_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::SET_LOCAL_I64;
+ return WebAssembly::LOCAL_SET_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::SET_LOCAL_F32;
+ return WebAssembly::LOCAL_SET_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::SET_LOCAL_F64;
+ return WebAssembly::LOCAL_SET_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::SET_LOCAL_V128;
+ return WebAssembly::LOCAL_SET_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::SET_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_SET_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
-/// Get the appropriate tee_local opcode for the given register class.
+/// Get the appropriate local.tee opcode for the given register class.
static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
- return WebAssembly::TEE_LOCAL_I32;
+ return WebAssembly::LOCAL_TEE_I32;
if (RC == &WebAssembly::I64RegClass)
- return WebAssembly::TEE_LOCAL_I64;
+ return WebAssembly::LOCAL_TEE_I64;
if (RC == &WebAssembly::F32RegClass)
- return WebAssembly::TEE_LOCAL_F32;
+ return WebAssembly::LOCAL_TEE_F32;
if (RC == &WebAssembly::F64RegClass)
- return WebAssembly::TEE_LOCAL_F64;
+ return WebAssembly::LOCAL_TEE_F64;
if (RC == &WebAssembly::V128RegClass)
- return WebAssembly::TEE_LOCAL_V128;
+ return WebAssembly::LOCAL_TEE_V128;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::TEE_LOCAL_EXCEPT_REF;
+ return WebAssembly::LOCAL_TEE_EXCEPT_REF;
llvm_unreachable("Unexpected register class");
}
@@ -155,6 +157,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
return MVT::f32;
if (RC == &WebAssembly::F64RegClass)
return MVT::f64;
+ if (RC == &WebAssembly::V128RegClass)
+ return MVT::v16i8;
if (RC == &WebAssembly::EXCEPT_REFRegClass)
return MVT::ExceptRef;
llvm_unreachable("unrecognized register class");
@@ -162,7 +166,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
/// Given a MachineOperand of a stackified vreg, return the instruction at the
/// start of the expression tree.
-static MachineInstr *FindStartOfTree(MachineOperand &MO,
+static MachineInstr *findStartOfTree(MachineOperand &MO,
MachineRegisterInfo &MRI,
WebAssemblyFunctionInfo &MFI) {
unsigned Reg = MO.getReg();
@@ -173,7 +177,7 @@ static MachineInstr *FindStartOfTree(MachineOperand &MO,
for (MachineOperand &DefMO : Def->explicit_uses()) {
if (!DefMO.isReg())
continue;
- return FindStartOfTree(DefMO, MRI, MFI);
+ return findStartOfTree(DefMO, MRI, MFI);
}
// If there were no stackified uses, we've reached the start.
@@ -186,7 +190,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
<< MF.getName() << '\n');
// Disable this pass if directed to do so.
- if (DisableWebAssemblyExplicitLocals)
+ if (WasmDisableExplicitLocals)
return false;
bool Changed = false;
@@ -206,19 +210,19 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
break;
unsigned Reg = MI.getOperand(0).getReg();
assert(!MFI.isVRegStackified(Reg));
- Reg2Local[Reg] = MI.getOperand(1).getImm();
+ Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm());
MI.eraseFromParent();
Changed = true;
}
// Start assigning local numbers after the last parameter.
- unsigned CurLocal = MFI.getParams().size();
+ unsigned CurLocal = static_cast<unsigned>(MFI.getParams().size());
// Precompute the set of registers that are unused, so that we can insert
// drops to their defs.
BitVector UseEmpty(MRI.getNumVirtRegs());
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i)
- UseEmpty[i] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(i));
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I)
+ UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I));
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
@@ -229,8 +233,8 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugInstr() || MI.isLabel())
continue;
- // Replace tee instructions with tee_local. The difference is that tee
- // instructins have two defs, while tee_local instructions have one def
+ // Replace tee instructions with local.tee. The difference is that tee
+ // instructions have two defs, while local.tee instructions have one def
// and an index of a local to write to.
if (WebAssembly::isTee(MI)) {
assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
@@ -249,7 +253,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
MFI.stackifyVReg(NewReg);
}
- // Replace the TEE with a TEE_LOCAL.
+ // Replace the TEE with a LOCAL_TEE.
unsigned LocalId =
getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
unsigned Opc = getTeeLocalOpcode(RC);
@@ -263,7 +267,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Insert set_locals for any defs that aren't stackified yet. Currently
+ // Insert local.sets for any defs that aren't stackified yet. Currently
// we handle at most one def.
assert(MI.getDesc().getNumDefs() <= 1);
if (MI.getDesc().getNumDefs() == 1) {
@@ -292,15 +296,16 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
.addReg(NewReg);
}
MI.getOperand(0).setReg(NewReg);
- // This register operand is now being used by the inserted drop
- // instruction, so make it undead.
+ // This register operand of the original instruction is now being used
+ // by the inserted drop or local.set instruction, so make it not dead
+ // yet.
MI.getOperand(0).setIsDead(false);
MFI.stackifyVReg(NewReg);
Changed = true;
}
}
- // Insert get_locals for any uses that aren't stackified yet.
+ // Insert local.gets for any uses that aren't stackified yet.
MachineInstr *InsertPt = &MI;
for (MachineOperand &MO : reverse(MI.explicit_uses())) {
if (!MO.isReg())
@@ -314,15 +319,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MO.isDef()) {
assert(MI.getOpcode() == TargetOpcode::INLINEASM);
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- MRI.removeRegOperandFromUseList(&MO);
- MO = MachineOperand::CreateImm(LocalId);
+ // If this register operand is tied to another operand, we can't
+ // change it to an immediate. Untie it first.
+ MI.untieRegOperand(MI.getOperandNo(&MO));
+ MO.ChangeToImmediate(LocalId);
continue;
}
// If we see a stackified register, prepare to insert subsequent
- // get_locals before the start of its tree.
+ // local.gets before the start of its tree.
if (MFI.isVRegStackified(OldReg)) {
- InsertPt = FindStartOfTree(MO, MRI, MFI);
+ InsertPt = findStartOfTree(MO, MRI, MFI);
continue;
}
@@ -330,12 +337,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// indices as immediates.
if (MI.getOpcode() == TargetOpcode::INLINEASM) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- MRI.removeRegOperandFromUseList(&MO);
- MO = MachineOperand::CreateImm(LocalId);
+ // Untie it first if this reg operand is tied to another operand.
+ MI.untieRegOperand(MI.getOperandNo(&MO));
+ MO.ChangeToImmediate(LocalId);
continue;
}
- // Insert a get_local.
+ // Insert a local.get.
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
@@ -361,13 +369,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Define the locals.
// TODO: Sort the locals for better compression.
MFI.setNumLocals(CurLocal - MFI.getParams().size());
- for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- auto I = Reg2Local.find(Reg);
- if (I == Reg2Local.end() || I->second < MFI.getParams().size())
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ auto RL = Reg2Local.find(Reg);
+ if (RL == Reg2Local.end() || RL->second < MFI.getParams().size())
continue;
- MFI.setLocal(I->second - MFI.getParams().size(),
+ MFI.setLocal(RL->second - MFI.getParams().size(),
typeForRegClass(MRI.getRegClass(Reg)));
Changed = true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 566ef68c027d..3856700cca94 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -37,7 +37,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "wasm-fastisel"
@@ -114,8 +117,8 @@ private:
// Utility helper routines
MVT::SimpleValueType getSimpleType(Type *Ty) {
EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
- return VT.isSimple() ? VT.getSimpleVT().SimpleTy :
- MVT::INVALID_SIMPLE_VALUE_TYPE;
+ return VT.isSimple() ? VT.getSimpleVT().SimpleTy
+ : MVT::INVALID_SIMPLE_VALUE_TYPE;
}
MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) {
switch (VT) {
@@ -138,6 +141,11 @@ private:
if (Subtarget->hasSIMD128())
return VT;
break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ if (Subtarget->hasUnimplementedSIMD128())
+ return VT;
+ break;
default:
break;
}
@@ -153,11 +161,9 @@ private:
MVT::SimpleValueType From);
unsigned signExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
- unsigned zeroExtend(unsigned Reg, const Value *V,
- MVT::SimpleValueType From,
+ unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
- unsigned signExtend(unsigned Reg, const Value *V,
- MVT::SimpleValueType From,
+ unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
MVT::SimpleValueType To);
unsigned getRegForUnsignedValue(const Value *V);
unsigned getRegForSignedValue(const Value *V);
@@ -374,14 +380,12 @@ void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
if (Addr.isRegBase()) {
unsigned Reg = Addr.getReg();
if (Reg == 0) {
- Reg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::CONST_I64 :
- WebAssembly::CONST_I32;
+ Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+ : WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg)
- .addImm(0);
+ .addImm(0);
Addr.setReg(Reg);
}
}
@@ -419,9 +423,10 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
return getRegForValue(ICmp->getOperand(0));
}
- if (BinaryOperator::isNot(V) && V->getType()->isIntegerTy(32)) {
+ Value *NotV;
+ if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
Not = true;
- return getRegForValue(BinaryOperator::getNotArgument(V));
+ return getRegForValue(NotV);
}
Not = false;
@@ -438,13 +443,12 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
switch (From) {
case MVT::i1:
- // If the value is naturally an i1, we don't need to mask it.
- // TODO: Recursively examine selects, phis, and, or, xor, constants.
- if (From == MVT::i1 && V != nullptr) {
- if (isa<CmpInst>(V) ||
- (isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr()))
- return copyValue(Reg);
- }
+ // If the value is naturally an i1, we don't need to mask it. We only know
+ // if a value is naturally an i1 if it is definitely lowered by FastISel,
+ // not a DAG ISel fallback.
+ if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())
+ return copyValue(Reg);
+ break;
case MVT::i8:
case MVT::i16:
break;
@@ -457,13 +461,13 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
- .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
+ .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
unsigned Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::AND_I32), Result)
- .addReg(Reg)
- .addReg(Imm);
+ .addReg(Reg)
+ .addReg(Imm);
return Result;
}
@@ -487,19 +491,19 @@ unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
- .addImm(32 - MVT(From).getSizeInBits());
+ .addImm(32 - MVT(From).getSizeInBits());
unsigned Left = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHL_I32), Left)
- .addReg(Reg)
- .addReg(Imm);
+ .addReg(Reg)
+ .addReg(Imm);
unsigned Right = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHR_S_I32), Right)
- .addReg(Left)
- .addReg(Imm);
+ .addReg(Left)
+ .addReg(Imm);
return Right;
}
@@ -562,8 +566,7 @@ unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
bool IsSigned) {
- return IsSigned ? getRegForSignedValue(V) :
- getRegForUnsignedValue(V);
+ return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V);
}
unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
@@ -572,15 +575,15 @@ unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::EQZ_I32), NotReg)
- .addReg(Reg);
+ .addReg(Reg);
return NotReg;
}
unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(WebAssembly::COPY), ResultReg)
- .addReg(Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
+ ResultReg)
+ .addReg(Reg);
return ResultReg;
}
@@ -589,12 +592,11 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::COPY_I64 :
- WebAssembly::COPY_I32;
+ unsigned ResultReg =
+ createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc =
+ Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(SI->second);
return ResultReg;
@@ -605,14 +607,13 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
- unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
- &WebAssembly::I64RegClass :
- &WebAssembly::I32RegClass);
- unsigned Opc = Subtarget->hasAddr64() ?
- WebAssembly::CONST_I64 :
- WebAssembly::CONST_I32;
+ unsigned ResultReg =
+ createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+ : &WebAssembly::I32RegClass);
+ unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+ : WebAssembly::CONST_I32;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addGlobalAddress(GV);
+ .addGlobalAddress(GV);
return ResultReg;
}
@@ -651,19 +652,19 @@ bool WebAssemblyFastISel::fastLowerArguments() {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- Opc = WebAssembly::ARGUMENT_I32;
+ Opc = WebAssembly::ARGUMENT_i32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
- Opc = WebAssembly::ARGUMENT_I64;
+ Opc = WebAssembly::ARGUMENT_i64;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
- Opc = WebAssembly::ARGUMENT_F32;
+ Opc = WebAssembly::ARGUMENT_f32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
- Opc = WebAssembly::ARGUMENT_F64;
+ Opc = WebAssembly::ARGUMENT_f64;
RC = &WebAssembly::F64RegClass;
break;
case MVT::v16i8:
@@ -678,12 +679,20 @@ bool WebAssemblyFastISel::fastLowerArguments() {
Opc = WebAssembly::ARGUMENT_v4i32;
RC = &WebAssembly::V128RegClass;
break;
+ case MVT::v2i64:
+ Opc = WebAssembly::ARGUMENT_v2i64;
+ RC = &WebAssembly::V128RegClass;
+ break;
case MVT::v4f32:
Opc = WebAssembly::ARGUMENT_v4f32;
RC = &WebAssembly::V128RegClass;
break;
+ case MVT::v2f64:
+ Opc = WebAssembly::ARGUMENT_v2f64;
+ RC = &WebAssembly::V128RegClass;
+ break;
case MVT::ExceptRef:
- Opc = WebAssembly::ARGUMENT_EXCEPT_REF;
+ Opc = WebAssembly::ARGUMENT_ExceptRef;
RC = &WebAssembly::EXCEPT_REFRegClass;
break;
default:
@@ -691,7 +700,7 @@ bool WebAssemblyFastISel::fastLowerArguments() {
}
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(i);
+ .addImm(i);
updateValueMap(&Arg, ResultReg);
++i;
@@ -710,7 +719,8 @@ bool WebAssemblyFastISel::fastLowerArguments() {
}
if (!F->getReturnType()->isVoidTy()) {
- MVT::SimpleValueType RetTy = getLegalType(getSimpleType(F->getReturnType()));
+ MVT::SimpleValueType RetTy =
+ getLegalType(getSimpleType(F->getReturnType()));
if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
MFI->clearParamsAndResults();
return false;
@@ -768,23 +778,33 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
ResultReg = createResultReg(&WebAssembly::F64RegClass);
break;
case MVT::v16i8:
- Opc =
- IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::PCALL_INDIRECT_v16i8;
+ Opc = IsDirect ? WebAssembly::CALL_v16i8
+ : WebAssembly::PCALL_INDIRECT_v16i8;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v8i16:
- Opc =
- IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::PCALL_INDIRECT_v8i16;
+ Opc = IsDirect ? WebAssembly::CALL_v8i16
+ : WebAssembly::PCALL_INDIRECT_v8i16;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4i32:
- Opc =
- IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::PCALL_INDIRECT_v4i32;
+ Opc = IsDirect ? WebAssembly::CALL_v4i32
+ : WebAssembly::PCALL_INDIRECT_v4i32;
+ ResultReg = createResultReg(&WebAssembly::V128RegClass);
+ break;
+ case MVT::v2i64:
+ Opc = IsDirect ? WebAssembly::CALL_v2i64
+ : WebAssembly::PCALL_INDIRECT_v2i64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4f32:
- Opc =
- IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::PCALL_INDIRECT_v4f32;
+ Opc = IsDirect ? WebAssembly::CALL_v4f32
+ : WebAssembly::PCALL_INDIRECT_v4f32;
+ ResultReg = createResultReg(&WebAssembly::V128RegClass);
+ break;
+ case MVT::v2f64:
+ Opc = IsDirect ? WebAssembly::CALL_v2f64
+ : WebAssembly::PCALL_INDIRECT_v2f64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::ExceptRef:
@@ -853,11 +873,11 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
const SelectInst *Select = cast<SelectInst>(I);
bool Not;
- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
+ unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
if (CondReg == 0)
return false;
- unsigned TrueReg = getRegForValue(Select->getTrueValue());
+ unsigned TrueReg = getRegForValue(Select->getTrueValue());
if (TrueReg == 0)
return false;
@@ -900,9 +920,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addReg(CondReg);
+ .addReg(TrueReg)
+ .addReg(FalseReg)
+ .addReg(CondReg);
updateValueMap(Select, ResultReg);
return true;
@@ -1002,7 +1022,8 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
isSigned = true;
break;
- default: return false;
+ default:
+ return false;
}
unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned);
@@ -1210,7 +1231,8 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
case MVT::f64:
Opc = WebAssembly::STORE_F64;
break;
- default: return false;
+ default:
+ return false;
}
materializeLoadStoreOperands(Addr);
@@ -1275,8 +1297,10 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
unsigned Opc;
switch (getSimpleType(RV->getType())) {
- case MVT::i1: case MVT::i8:
- case MVT::i16: case MVT::i32:
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
Opc = WebAssembly::RETURN_I32;
break;
case MVT::i64:
@@ -1297,13 +1321,20 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
case MVT::v4i32:
Opc = WebAssembly::RETURN_v4i32;
break;
+ case MVT::v2i64:
+ Opc = WebAssembly::RETURN_v2i64;
+ break;
case MVT::v4f32:
Opc = WebAssembly::RETURN_v4f32;
break;
+ case MVT::v2f64:
+ Opc = WebAssembly::RETURN_v2f64;
+ break;
case MVT::ExceptRef:
Opc = WebAssembly::RETURN_EXCEPT_REF;
break;
- default: return false;
+ default:
+ return false;
}
unsigned Reg;
@@ -1333,19 +1364,32 @@ bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
if (selectCall(I))
return true;
break;
- case Instruction::Select: return selectSelect(I);
- case Instruction::Trunc: return selectTrunc(I);
- case Instruction::ZExt: return selectZExt(I);
- case Instruction::SExt: return selectSExt(I);
- case Instruction::ICmp: return selectICmp(I);
- case Instruction::FCmp: return selectFCmp(I);
- case Instruction::BitCast: return selectBitCast(I);
- case Instruction::Load: return selectLoad(I);
- case Instruction::Store: return selectStore(I);
- case Instruction::Br: return selectBr(I);
- case Instruction::Ret: return selectRet(I);
- case Instruction::Unreachable: return selectUnreachable(I);
- default: break;
+ case Instruction::Select:
+ return selectSelect(I);
+ case Instruction::Trunc:
+ return selectTrunc(I);
+ case Instruction::ZExt:
+ return selectZExt(I);
+ case Instruction::SExt:
+ return selectSExt(I);
+ case Instruction::ICmp:
+ return selectICmp(I);
+ case Instruction::FCmp:
+ return selectFCmp(I);
+ case Instruction::BitCast:
+ return selectBitCast(I);
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::Br:
+ return selectBr(I);
+ case Instruction::Ret:
+ return selectRet(I);
+ case Instruction::Unreachable:
+ return selectUnreachable(I);
+ default:
+ break;
}
// Fall back to target-independent instruction selection.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index d5e47ee82513..1a416520f97d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -36,10 +36,10 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-fix-function-bitcasts"
-static cl::opt<bool> TemporaryWorkarounds(
- "wasm-temporary-workarounds",
- cl::desc("Apply certain temporary workarounds"),
- cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ TemporaryWorkarounds("wasm-temporary-workarounds",
+ cl::desc("Apply certain temporary workarounds"),
+ cl::init(true), cl::Hidden);
namespace {
class FixFunctionBitcasts final : public ModulePass {
@@ -103,14 +103,29 @@ static void FindUses(Value *V, Function &F,
// - Return value is not needed: drop it
// - Return value needed but not present: supply an undef
//
-// For now, return nullptr without creating a wrapper if the wrapper cannot
-// be generated due to incompatible types.
+// If the all the argument types of trivially castable to one another (i.e.
+// I32 vs pointer type) then we don't create a wrapper at all (return nullptr
+// instead).
+//
+// If there is a type mismatch that we know would result in an invalid wasm
+// module then generate wrapper that contains unreachable (i.e. abort at
+// runtime). Such programs are deep into undefined behaviour territory,
+// but we choose to fail at runtime rather than generate and invalid module
+// or fail at compiler time. The reason we delay the error is that we want
+// to support the CMake which expects to be able to compile and link programs
+// that refer to functions with entirely incorrect signatures (this is how
+// CMake detects the existence of a function in a toolchain).
+//
+// For bitcasts that involve struct types we don't know at this stage if they
+// would be equivalent at the wasm level and so we can't know if we need to
+// generate a wrapper.
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
Module *M = F->getParent();
- Function *Wrapper =
- Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
+ Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+ F->getName() + "_bitcast", M);
BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+ const DataLayout &DL = BB->getModule()->getDataLayout();
// Determine what arguments to pass.
SmallVector<Value *, 4> Args;
@@ -118,38 +133,103 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
Function::arg_iterator AE = Wrapper->arg_end();
FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
FunctionType::param_iterator PE = F->getFunctionType()->param_end();
+ bool TypeMismatch = false;
+ bool WrapperNeeded = false;
+
+ Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+ Type *RtnType = Ty->getReturnType();
+
+ if ((F->getFunctionType()->getNumParams() != Ty->getNumParams()) ||
+ (F->getFunctionType()->isVarArg() != Ty->isVarArg()) ||
+ (ExpectedRtnType != RtnType))
+ WrapperNeeded = true;
+
for (; AI != AE && PI != PE; ++AI, ++PI) {
- if (AI->getType() != *PI) {
- Wrapper->eraseFromParent();
- return nullptr;
+ Type *ArgType = AI->getType();
+ Type *ParamType = *PI;
+
+ if (ArgType == ParamType) {
+ Args.push_back(&*AI);
+ } else {
+ if (CastInst::isBitOrNoopPointerCastable(ArgType, ParamType, DL)) {
+ Instruction *PtrCast =
+ CastInst::CreateBitOrPointerCast(AI, ParamType, "cast");
+ BB->getInstList().push_back(PtrCast);
+ Args.push_back(PtrCast);
+ } else if (ArgType->isStructTy() || ParamType->isStructTy()) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
+ << F->getName() << "\n");
+ WrapperNeeded = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
+ << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
+ << *ParamType << " Got: " << *ArgType << "\n");
+ TypeMismatch = true;
+ break;
+ }
}
- Args.push_back(&*AI);
}
- for (; PI != PE; ++PI)
- Args.push_back(UndefValue::get(*PI));
- if (F->isVarArg())
- for (; AI != AE; ++AI)
- Args.push_back(&*AI);
- CallInst *Call = CallInst::Create(F, Args, "", BB);
-
- // Determine what value to return.
- if (Ty->getReturnType()->isVoidTy())
- ReturnInst::Create(M->getContext(), BB);
- else if (F->getFunctionType()->getReturnType()->isVoidTy())
- ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
- BB);
- else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
- ReturnInst::Create(M->getContext(), Call, BB);
- else {
+ if (WrapperNeeded && !TypeMismatch) {
+ for (; PI != PE; ++PI)
+ Args.push_back(UndefValue::get(*PI));
+ if (F->isVarArg())
+ for (; AI != AE; ++AI)
+ Args.push_back(&*AI);
+
+ CallInst *Call = CallInst::Create(F, Args, "", BB);
+
+ Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+ Type *RtnType = Ty->getReturnType();
+ // Determine what value to return.
+ if (RtnType->isVoidTy()) {
+ ReturnInst::Create(M->getContext(), BB);
+ } else if (ExpectedRtnType->isVoidTy()) {
+ LLVM_DEBUG(dbgs() << "Creating dummy return: " << *RtnType << "\n");
+ ReturnInst::Create(M->getContext(), UndefValue::get(RtnType), BB);
+ } else if (RtnType == ExpectedRtnType) {
+ ReturnInst::Create(M->getContext(), Call, BB);
+ } else if (CastInst::isBitOrNoopPointerCastable(ExpectedRtnType, RtnType,
+ DL)) {
+ Instruction *Cast =
+ CastInst::CreateBitOrPointerCast(Call, RtnType, "cast");
+ BB->getInstList().push_back(Cast);
+ ReturnInst::Create(M->getContext(), Cast, BB);
+ } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
+ << F->getName() << "\n");
+ WrapperNeeded = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
+ << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
+ << " Got: " << *RtnType << "\n");
+ TypeMismatch = true;
+ }
+ }
+
+ if (TypeMismatch) {
+ // Create a new wrapper that simply contains `unreachable`.
+ Wrapper->eraseFromParent();
+ Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+ F->getName() + "_bitcast_invalid", M);
+ BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+ new UnreachableInst(M->getContext(), BB);
+ Wrapper->setName(F->getName() + "_bitcast_invalid");
+ } else if (!WrapperNeeded) {
+ LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
+ << "\n");
Wrapper->eraseFromParent();
return nullptr;
}
-
+ LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
return Wrapper;
}
bool FixFunctionBitcasts::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
+
Function *Main = nullptr;
CallInst *CallMain = nullptr;
SmallVector<std::pair<Use *, Function *>, 0> Uses;
@@ -166,19 +246,17 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
Main = &F;
LLVMContext &C = M.getContext();
- Type *MainArgTys[] = {
- PointerType::get(Type::getInt8PtrTy(C), 0),
- Type::getInt32Ty(C)
- };
+ Type *MainArgTys[] = {Type::getInt32Ty(C),
+ PointerType::get(Type::getInt8PtrTy(C), 0)};
FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
/*isVarArg=*/false);
if (F.getFunctionType() != MainTy) {
- Value *Args[] = {
- UndefValue::get(MainArgTys[0]),
- UndefValue::get(MainArgTys[1])
- };
- Value *Casted = ConstantExpr::getBitCast(Main,
- PointerType::get(MainTy, 0));
+ LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
+ << *F.getFunctionType() << "\n");
+ Value *Args[] = {UndefValue::get(MainArgTys[0]),
+ UndefValue::get(MainArgTys[1])};
+ Value *Casted =
+ ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
CallMain = CallInst::Create(Casted, Args, "call_main");
Use *UseMain = &CallMain->getOperandUse(2);
Uses.push_back(std::make_pair(UseMain, &F));
@@ -200,11 +278,6 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Ty)
continue;
- // Bitcasted vararg functions occur in Emscripten's implementation of
- // EM_ASM, so suppress wrappers for them for now.
- if (TemporaryWorkarounds && (Ty->isVarArg() || F->isVarArg()))
- continue;
-
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
Pair.first->second = CreateWrapper(F, Ty);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index bea027be7711..108f2879a071 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements a pass that transforms irreducible control flow
-/// into reducible control flow. Irreducible control flow means multiple-entry
+/// This file implements a pass that transforms irreducible control flow into
+/// reducible control flow. Irreducible control flow means multiple-entry
/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
/// due to being unnatural.
///
@@ -17,12 +17,36 @@
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
-/// TODO: The transformation implemented here handles all irreducible control
-/// flow, without exponential code-size expansion, though it does so by creating
-/// inefficient code in many cases. Ideally, we should add other
-/// transformations, including code-duplicating cases, which can be more
-/// efficient in common cases, and they can fall back to this conservative
-/// implementation as needed.
+/// The big picture: Ignoring natural loops (seeing them monolithically), we
+/// find all the blocks which can return to themselves ("loopers"). Loopers
+/// reachable from the non-loopers are loop entries: if there are 2 or more,
+/// then we have irreducible control flow. We fix that as follows: a new block
+/// is created that can dispatch to each of the loop entries, based on the
+/// value of a label "helper" variable, and we replace direct branches to the
+/// entries with assignments to the label variable and a branch to the dispatch
+/// block. Then the dispatch block is the single entry in a new natural loop.
+///
+/// This is similar to what the Relooper [1] does, both identify looping code
+/// that requires multiple entries, and resolve it in a similar way. In
+/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// also that like the Relooper, we implement a "minimal" intervention: we only
+/// use the "label" helper for the blocks we absolutely must and no others. We
+/// also prioritize code size and do not perform node splitting (i.e. we don't
+/// duplicate code in order to resolve irreducibility).
+///
+/// The difference between this code and the Relooper is that the Relooper also
+/// generates ifs and loops and works in a recursive manner, knowing at each
+/// point what the entries are, and recursively breaks down the problem. Here
+/// we just want to resolve irreducible control flow, and we also want to use
+/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
+/// identify natural loops, etc., and we start with the whole CFG and must
+/// identify both the looping code and its entries.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object oriented
+/// programming systems languages and applications companion (SPLASH '11). ACM,
+/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
///
//===----------------------------------------------------------------------===//
@@ -46,141 +70,203 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
namespace {
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
- StringRef getPassName() const override {
- return "WebAssembly Fix Irreducible Control Flow";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
-} // end anonymous namespace
-
-char WebAssemblyFixIrreducibleControlFlow::ID = 0;
-INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
- "Removes irreducible control flow", false, false)
-
-FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
- return new WebAssemblyFixIrreducibleControlFlow();
-}
-
-namespace {
-
-/// A utility for walking the blocks of a loop, handling a nested inner
-/// loop as a monolithic conceptual block.
-class MetaBlock {
- MachineBasicBlock *Block;
- SmallVector<MachineBasicBlock *, 2> Preds;
- SmallVector<MachineBasicBlock *, 2> Succs;
+class LoopFixer {
public:
- explicit MetaBlock(MachineBasicBlock *MBB)
- : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()),
- Succs(MBB->succ_begin(), MBB->succ_end()) {}
-
- explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) {
- Loop->getExitBlocks(Succs);
- for (MachineBasicBlock *Pred : Block->predecessors())
- if (!Loop->contains(Pred))
- Preds.push_back(Pred);
+ LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
+ : MF(MF), MLI(MLI), Loop(Loop) {}
+
+ // Run the fixer on the given inputs. Returns whether changes were made.
+ bool run();
+
+private:
+ MachineFunction &MF;
+ MachineLoopInfo &MLI;
+ MachineLoop *Loop;
+
+ MachineBasicBlock *Header;
+ SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+
+ using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+ DenseMap<MachineBasicBlock *, BlockSet> Reachable;
+
+ // The worklist contains pairs of recent additions, (a, b), where we just
+ // added a link a => b.
+ using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+ SmallVector<BlockPair, 4> WorkList;
+
+ // Get a canonical block to represent a block or a loop: the block, or if in
+ // an inner loop, the loop header, of it in an outer loop scope, we can
+ // ignore it. We need to call this on all blocks we work on.
+ MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
+ MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+ if (InnerLoop == Loop) {
+ return MBB;
+ } else {
+ // This is either in an outer or an inner loop, and not in ours.
+ if (!LoopBlocks.count(MBB)) {
+ // It's in outer code, ignore it.
+ return nullptr;
+ }
+ assert(InnerLoop);
+ // It's in an inner loop, canonicalize it to the header of that loop.
+ return InnerLoop->getHeader();
+ }
}
- MachineBasicBlock *getBlock() const { return Block; }
-
- const SmallVectorImpl<MachineBasicBlock *> &predecessors() const {
- return Preds;
- }
- const SmallVectorImpl<MachineBasicBlock *> &successors() const {
- return Succs;
+ // For a successor we can additionally ignore it if it's a branch back to a
+ // natural loop top, as when we are in the scope of a loop, we just care
+ // about internal irreducibility, and can ignore the loop we are in. We need
+ // to call this on all blocks in a context where they are a successor.
+ MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
+ if (Loop && MBB == Loop->getHeader()) {
+ // Ignore branches going to the loop's natural header.
+ return nullptr;
+ }
+ return canonicalize(MBB);
}
- bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; }
- bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; }
+ // Potentially insert a new reachable edge, and if so, note it as further
+ // work.
+ void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ assert(MBB == canonicalize(MBB));
+ assert(Succ);
+ // Succ may not be interesting as a sucessor.
+ Succ = canonicalizeSuccessor(Succ);
+ if (!Succ)
+ return;
+ if (Reachable[MBB].insert(Succ).second) {
+ // For there to be further work, it means that we have
+ // X => MBB => Succ
+ // for some other X, and in that case X => Succ would be a new edge for
+ // us to discover later. However, if we don't care about MBB as a
+ // successor, then we don't care about that anyhow.
+ if (canonicalizeSuccessor(MBB)) {
+ WorkList.emplace_back(MBB, Succ);
+ }
+ }
+ }
};
-class SuccessorList final : public MetaBlock {
- size_t Index;
- size_t Num;
+bool LoopFixer::run() {
+ Header = Loop ? Loop->getHeader() : &*MF.begin();
-public:
- explicit SuccessorList(MachineBasicBlock *MBB)
- : MetaBlock(MBB), Index(0), Num(successors().size()) {}
+ // Identify all the blocks in this loop scope.
+ if (Loop) {
+ for (auto *MBB : Loop->getBlocks()) {
+ LoopBlocks.insert(MBB);
+ }
+ } else {
+ for (auto &MBB : MF) {
+ LoopBlocks.insert(&MBB);
+ }
+ }
- explicit SuccessorList(MachineLoop *Loop)
- : MetaBlock(Loop), Index(0), Num(successors().size()) {}
+ // Compute which (canonicalized) blocks each block can reach.
- bool HasNext() const { return Index != Num; }
+ // Add all the initial work.
+ for (auto *MBB : LoopBlocks) {
+ MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
- MachineBasicBlock *Next() {
- assert(HasNext());
- return successors()[Index++];
+ if (InnerLoop == Loop) {
+ for (auto *Succ : MBB->successors()) {
+ maybeInsert(MBB, Succ);
+ }
+ } else {
+ // It can't be in an outer loop - we loop on LoopBlocks - and so it must
+ // be an inner loop.
+ assert(InnerLoop);
+ // Check if we are the canonical block for this loop.
+ if (canonicalize(MBB) != MBB) {
+ continue;
+ }
+ // The successors are those of the loop.
+ SmallVector<MachineBasicBlock *, 2> ExitBlocks;
+ InnerLoop->getExitBlocks(ExitBlocks);
+ for (auto *Succ : ExitBlocks) {
+ maybeInsert(MBB, Succ);
+ }
+ }
}
-};
-} // end anonymous namespace
+ // Do work until we are all done.
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB;
+ MachineBasicBlock *Succ;
+ std::tie(MBB, Succ) = WorkList.pop_back_val();
+ // The worklist item is an edge we just added, so it must have valid blocks
+ // (and not something canonicalized to nullptr).
+ assert(MBB);
+ assert(Succ);
+ // The successor in that pair must also be a valid successor.
+ assert(MBB == canonicalizeSuccessor(MBB));
+ // We recently added MBB => Succ, and that means we may have enabled
+ // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
+ // is correct for both a block and a block representing a loop, as the loop
+ // is natural and so the predecessors are all predecessors of the loop
+ // header, which is the block we have here.
+ for (auto *Pred : MBB->predecessors()) {
+ // Canonicalize, make sure it's relevant, and check it's not the same
+ // block (an update to the block itself doesn't help compute that same
+ // block).
+ Pred = canonicalize(Pred);
+ if (Pred && Pred != MBB) {
+ maybeInsert(Pred, Succ);
+ }
+ }
+ }
-bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
- MachineLoopInfo &MLI,
- MachineLoop *Loop) {
- MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
- SetVector<MachineBasicBlock *> RewriteSuccs;
-
- // DFS through Loop's body, looking for irreducible control flow. Loop is
- // natural, and we stay in its body, and we treat any nested loops
- // monolithically, so any cycles we encounter indicate irreducibility.
- SmallPtrSet<MachineBasicBlock *, 8> OnStack;
- SmallPtrSet<MachineBasicBlock *, 8> Visited;
- SmallVector<SuccessorList, 4> LoopWorklist;
- LoopWorklist.push_back(SuccessorList(Header));
- OnStack.insert(Header);
- Visited.insert(Header);
- while (!LoopWorklist.empty()) {
- SuccessorList &Top = LoopWorklist.back();
- if (Top.HasNext()) {
- MachineBasicBlock *Next = Top.Next();
- if (Next == Header || (Loop && !Loop->contains(Next)))
- continue;
- if (LLVM_LIKELY(OnStack.insert(Next).second)) {
- if (!Visited.insert(Next).second) {
- OnStack.erase(Next);
- continue;
- }
- MachineLoop *InnerLoop = MLI.getLoopFor(Next);
- if (InnerLoop != Loop)
- LoopWorklist.push_back(SuccessorList(InnerLoop));
- else
- LoopWorklist.push_back(SuccessorList(Next));
- } else {
- RewriteSuccs.insert(Top.getBlock());
+ // It's now trivial to identify the loopers.
+ SmallPtrSet<MachineBasicBlock *, 4> Loopers;
+ for (auto MBB : LoopBlocks) {
+ if (Reachable[MBB].count(MBB)) {
+ Loopers.insert(MBB);
+ }
+ }
+ // The header cannot be a looper. At the toplevel, LLVM does not allow the
+ // entry to be in a loop, and in a natural loop we should ignore the header.
+ assert(Loopers.count(Header) == 0);
+
+ // Find the entries, loopers reachable from non-loopers.
+ SmallPtrSet<MachineBasicBlock *, 4> Entries;
+ SmallVector<MachineBasicBlock *, 4> SortedEntries;
+ for (auto *Looper : Loopers) {
+ for (auto *Pred : Looper->predecessors()) {
+ Pred = canonicalize(Pred);
+ if (Pred && !Loopers.count(Pred)) {
+ Entries.insert(Looper);
+ SortedEntries.push_back(Looper);
+ break;
}
- continue;
}
- OnStack.erase(Top.getBlock());
- LoopWorklist.pop_back();
}
- // Most likely, we didn't find any irreducible control flow.
- if (LLVM_LIKELY(RewriteSuccs.empty()))
+ // Check if we found irreducible control flow.
+ if (LLVM_LIKELY(Entries.size() <= 1))
return false;
- LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n");
+ // Sort the entries to ensure a deterministic build.
+ llvm::sort(SortedEntries,
+ [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ auto ANum = A->getNumber();
+ auto BNum = B->getNumber();
+ return ANum < BNum;
+ });
+
+#ifndef NDEBUG
+ for (auto Block : SortedEntries)
+ assert(Block->getNumber() != -1);
+ if (SortedEntries.size() > 1) {
+ for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
+ I != E; ++I) {
+ auto ANum = (*I)->getNumber();
+ auto BNum = (*(std::next(I)))->getNumber();
+ assert(ANum != BNum);
+ }
+ }
+#endif
- // Ok. We have irreducible control flow! Create a dispatch block which will
- // contains a jump table to any block in the problematic set of blocks.
+ // Create a dispatch block which will contain a jump table to the entries.
MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
MF.insert(MF.end(), Dispatch);
MLI.changeLoopFor(Dispatch, Loop);
@@ -196,43 +282,43 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MIB.addReg(Reg);
- // Collect all the blocks which need to have their successors rewritten,
- // add the successors to the jump table, and remember their index.
+ // Compute the indices in the superheader, one for each bad block, and
+ // add them as successors.
DenseMap<MachineBasicBlock *, unsigned> Indices;
- SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
- RewriteSuccs.end());
- while (!SuccWorklist.empty()) {
- MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
+ for (auto *MBB : SortedEntries) {
auto Pair = Indices.insert(std::make_pair(MBB, 0));
- if (!Pair.second)
+ if (!Pair.second) {
continue;
+ }
unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index
- << "\n");
-
Pair.first->second = Index;
- for (auto Pred : MBB->predecessors())
- RewriteSuccs.insert(Pred);
MIB.addMBB(MBB);
Dispatch->addSuccessor(MBB);
+ }
- MetaBlock Meta(MBB);
- for (auto *Succ : Meta.successors())
- if (Succ != Header && (!Loop || Loop->contains(Succ)))
- SuccWorklist.push_back(Succ);
+ // Rewrite the problematic successors for every block that wants to reach the
+ // bad blocks. For simplicity, we just introduce a new block for every edge
+ // we need to rewrite. (Fancier things are possible.)
+
+ SmallVector<MachineBasicBlock *, 4> AllPreds;
+ for (auto *MBB : SortedEntries) {
+ for (auto *Pred : MBB->predecessors()) {
+ if (Pred != Dispatch) {
+ AllPreds.push_back(Pred);
+ }
+ }
}
- // Rewrite the problematic successors for every block in RewriteSuccs.
- // For simplicity, we just introduce a new block for every edge we need to
- // rewrite. Fancier things are possible.
- for (MachineBasicBlock *MBB : RewriteSuccs) {
+ for (MachineBasicBlock *MBB : AllPreds) {
DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
for (auto *Succ : MBB->successors()) {
- if (!Indices.count(Succ))
+ if (!Entries.count(Succ)) {
continue;
+ }
+ // This is a successor we need to rewrite.
MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
: MF.end(),
@@ -266,6 +352,55 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
return true;
}
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Fix Irreducible Control Flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
+ // Visit the function body, which is identified as a null loop.
+ if (LoopFixer(MF, MLI, nullptr).run()) {
+ return true;
+ }
+
+ // Visit all the loops.
+ SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
+ while (!Worklist.empty()) {
+ MachineLoop *Loop = Worklist.pop_back_val();
+ Worklist.append(Loop->begin(), Loop->end());
+ if (LoopFixer(MF, MLI, Loop).run()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyFixIrreducibleControlFlow::ID = 0;
+INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
+ "Removes irreducible control flow", false, false)
+
+FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
+ return new WebAssemblyFixIrreducibleControlFlow();
+}
+
bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
@@ -275,24 +410,19 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
bool Changed = false;
auto &MLI = getAnalysis<MachineLoopInfo>();
- // Visit the function body, which is identified as a null loop.
- Changed |= VisitLoop(MF, MLI, nullptr);
-
- // Visit all the loops.
- SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
- while (!Worklist.empty()) {
- MachineLoop *CurLoop = Worklist.pop_back_val();
- Worklist.append(CurLoop->begin(), CurLoop->end());
- Changed |= VisitLoop(MF, MLI, CurLoop);
- }
-
- // If we made any changes, completely recompute everything.
- if (LLVM_UNLIKELY(Changed)) {
- LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n");
+ // When we modify something, bail out and recompute MLI, then start again, as
+ // we create a new natural loop when we resolve irreducible control flow, and
+ // other loops may become nested in it, etc. In practice this is not an issue
+ // because irreducible control flow is rare, only very few cycles are needed
+ // here.
+ while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
+ // We rewrote part of the function; recompute MLI and start again.
+ LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
MF.getRegInfo().invalidateLiveness();
MF.RenumberBlocks();
getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
MLI.runOnMachineFunction(MF);
+ Changed = true;
}
return Changed;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 052c94e9d6a9..2d5aff28d27b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -42,8 +43,7 @@ using namespace llvm;
/// require stricter alignment than the stack pointer itself. Because we need
/// to shift the stack pointer by some unknown amount to force the alignment,
/// we need to record the value of the stack pointer on entry to the function.
-bool WebAssemblyFrameLowering::hasBP(
- const MachineFunction &MF) const {
+bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const {
const auto *RegInfo =
MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
return RegInfo->needsStackRealignment(MF);
@@ -78,36 +78,60 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
return !MF.getFrameInfo().hasVarSizedObjects();
}
+// Returns true if this function needs a local user-space stack pointer for its
+// local frame (not for exception handling).
+bool WebAssemblyFrameLowering::needsSPForLocalFrame(
+ const MachineFunction &MF) const {
+ auto &MFI = MF.getFrameInfo();
+ return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+}
+
+// In function with EH pads, we need to make a copy of the value of
+// __stack_pointer global in SP32 register, in order to use it when restoring
+// __stack_pointer after an exception is caught.
+bool WebAssemblyFrameLowering::needsPrologForEH(
+ const MachineFunction &MF) const {
+ auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
+ return EHType == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn() && MF.getFrameInfo().hasCalls();
+}
/// Returns true if this function needs a local user-space stack pointer.
/// Unlike a machine stack pointer, the wasm user stack pointer is a global
/// variable, so it is loaded into a register in the prolog.
-bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF,
- const MachineFrameInfo &MFI) const {
- return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const {
+ return needsSPForLocalFrame(MF) || needsPrologForEH(MF);
}
/// Returns true if the local user-space stack pointer needs to be written back
-/// to memory by this function (this is not meaningful if needsSP is false). If
-/// false, the stack red zone can be used and only a local SP is needed.
+/// to __stack_pointer global by this function (this is not meaningful if
+/// needsSP is false). If false, the stack red zone can be used and only a local
+/// SP is needed.
bool WebAssemblyFrameLowering::needsSPWriteback(
- const MachineFunction &MF, const MachineFrameInfo &MFI) const {
- assert(needsSP(MF, MFI));
- return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() ||
- MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+ const MachineFunction &MF) const {
+ auto &MFI = MF.getFrameInfo();
+ assert(needsSP(MF));
+ // When we don't need a local stack pointer for its local frame but only to
+ // support EH, we don't need to write SP back in the epilog, because we don't
+ // bump down the stack pointer in the prolog. We need to write SP back in the
+ // epilog only if
+ // 1. We need SP not only for EH support but also because we actually use
+ // stack or we have a frame address taken.
+ // 2. We cannot use the red zone.
+ bool CanUseRedZone = MFI.getStackSize() <= RedZoneSize && !MFI.hasCalls() &&
+ !MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+ return needsSPForLocalFrame(MF) && !CanUseRedZone;
}
-static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &InsertAddr,
- MachineBasicBlock::iterator &InsertStore,
- const DebugLoc &DL) {
+void WebAssemblyFrameLowering::writeSPToGlobal(
+ unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
- .addExternalSymbol(SPSymbol)
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+ .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL)
.addReg(SrcReg);
}
@@ -119,9 +143,9 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
"Call frame pseudos should only be used for dynamic stack adjustment");
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
- needsSPWriteback(MF, MF.getFrameInfo())) {
+ needsSPWriteback(MF)) {
DebugLoc DL = I->getDebugLoc();
- writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
+ writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
}
return MBB.erase(I);
}
@@ -133,7 +157,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
assert(MFI.getCalleeSavedInfo().empty() &&
"WebAssembly should not have callee-saved registers");
- if (!needsSP(MF, MFI)) return;
+ if (!needsSP(MF))
+ return;
uint64_t StackSize = MFI.getStackSize();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -152,8 +177,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
- .addExternalSymbol(SPSymbol);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+ .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL);
bool HasBP = hasBP(MF);
if (HasBP) {
@@ -177,7 +202,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
unsigned Alignment = MFI.getMaxAlignment();
assert((1u << countTrailingZeros(Alignment)) == Alignment &&
- "Alignment must be a power of 2");
+ "Alignment must be a power of 2");
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
.addImm((int)~(Alignment - 1));
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
@@ -189,20 +214,19 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
// Unlike most conventional targets (where FP points to the saved FP),
// FP points to the bottom of the fixed-size locals, so we can use positive
// offsets in load/store instructions.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY),
- WebAssembly::FP32)
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
.addReg(WebAssembly::SP32);
}
- if (StackSize && needsSPWriteback(MF, MFI)) {
- writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
+ if (StackSize && needsSPWriteback(MF)) {
+ writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
}
}
void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- auto &MFI = MF.getFrameInfo();
- uint64_t StackSize = MFI.getStackSize();
- if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
+ uint64_t StackSize = MF.getFrameInfo().getStackSize();
+ if (!needsSP(MF) || !needsSPWriteback(MF))
+ return;
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.getFirstTerminator();
@@ -214,7 +238,6 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer. If we had fixed-size locals, add the offset
// subtracted in the prolog.
unsigned SPReg = 0;
- MachineBasicBlock::iterator InsertAddr = InsertPt;
if (hasBP(MF)) {
auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
SPReg = FI->getBasePointerVreg();
@@ -222,9 +245,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
- InsertAddr =
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
- .addImm(StackSize);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
// In the epilog we don't need to write the result back to the SP32 physreg
// because it won't be used again. We can use a stackified register instead.
SPReg = MRI.createVirtualRegister(PtrRC);
@@ -235,5 +257,5 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
}
- writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
+ writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index fe23e418a3f1..c6fa8261b03f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -22,9 +22,10 @@ namespace llvm {
class MachineFrameInfo;
class WebAssemblyFrameLowering final : public TargetFrameLowering {
- public:
+public:
/// Size of the red zone for the user stack (leaf functions can use this much
- /// space below the stack pointer without writing it back to memory).
+ /// space below the stack pointer without writing it back to __stack_pointer
+ /// global).
// TODO: (ABI) Revisit and decide how large it should be.
static const size_t RedZoneSize = 128;
@@ -34,9 +35,9 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
/*TransientStackAlignment=*/16,
/*StackRealignable=*/true) {}
- MachineBasicBlock::iterator eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
/// These methods insert prolog and epilog code into the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
@@ -45,13 +46,21 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- private:
+ bool needsPrologForEH(const MachineFunction &MF) const;
+
+ /// Write SP back to __stack_pointer global.
+ void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &InsertStore,
+ const DebugLoc &DL) const;
+
+private:
bool hasBP(const MachineFunction &MF) const;
- bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const;
- bool needsSPWriteback(const MachineFunction &MF,
- const MachineFrameInfo &MFI) const;
+ bool needsSPForLocalFrame(const MachineFunction &MF) const;
+ bool needsSP(const MachineFunction &MF) const;
+ bool needsSPWriteback(const MachineFunction &MF) const;
};
-} // end namespace llvm
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index c12550feabbb..e987d7f7f43a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -21,5 +21,10 @@ HANDLE_NODETYPE(ARGUMENT)
HANDLE_NODETYPE(Wrapper)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
+HANDLE_NODETYPE(SHUFFLE)
+HANDLE_NODETYPE(VEC_SHL)
+HANDLE_NODETYPE(VEC_SHR_S)
+HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(THROW)
// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index fdf3a30a5c0e..0a7464cedc90 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -48,6 +48,10 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
MF.getFunction().hasFnAttribute(Attribute::MinSize);
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 283e703e1f6c..003848e34227 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -21,8 +21,10 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
@@ -42,6 +44,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Booleans always contain 0 or 1.
setBooleanContents(ZeroOrOneBooleanContent);
+ // Except in SIMD vectors
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// WebAssembly does not produce floating-point exceptions on normal floating
// point operations.
setHasFloatingPointExceptions(false);
@@ -60,6 +64,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
+ addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
+ }
}
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -77,7 +85,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
- for (auto T : {MVT::f32, MVT::f64}) {
+ for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
// Don't expand the floating-point types to constant pools.
setOperationAction(ISD::ConstantFP, T, Legal);
// Expand floating-point comparisons.
@@ -85,17 +93,17 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
setCondCodeAction(CC, T, Expand);
// Expand floating-point library function operators.
- for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
- ISD::FMA})
+ for (auto Op :
+ {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
setOperationAction(Op, T, Expand);
// Note supported floating-point library function operators that otherwise
// default to expand.
for (auto Op :
{ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
setOperationAction(Op, T, Legal);
- // Support minnan and maxnan, which otherwise default to expand.
- setOperationAction(ISD::FMINNAN, T, Legal);
- setOperationAction(ISD::FMAXNAN, T, Legal);
+ // Support minimum and maximum, which otherwise default to expand.
+ setOperationAction(ISD::FMINIMUM, T, Legal);
+ setOperationAction(ISD::FMAXIMUM, T, Legal);
// WebAssembly currently has no builtin f16 support.
setOperationAction(ISD::FP16_TO_FP, T, Expand);
setOperationAction(ISD::FP_TO_FP16, T, Expand);
@@ -103,24 +111,75 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTruncStoreAction(T, MVT::f16, Expand);
}
- for (auto T : {MVT::i32, MVT::i64}) {
- // Expand unavailable integer operations.
- for (auto Op :
- {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
- ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
- ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
- ISD::SUBE}) {
+ // Support saturating add for i8x16 and i16x8
+ if (Subtarget->hasSIMD128())
+ for (auto T : {MVT::v16i8, MVT::v8i16})
+ for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
+ setOperationAction(Op, T, Legal);
+
+ // Expand unavailable integer operations.
+ for (auto Op :
+ {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
+ ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
+ for (auto T : {MVT::i32, MVT::i64}) {
setOperationAction(Op, T, Expand);
}
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+ setOperationAction(Op, T, Expand);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(Op, MVT::v2i64, Expand);
+ }
+ }
+ }
+
+ // There is no i64x2.mul instruction
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+
+ // We have custom shuffle lowering to expose the shuffle mask
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ }
+ }
+
+ // Custom lowering since wasm shifts must have a scalar shift amount
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ setOperationAction(Op, T, Custom);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ setOperationAction(Op, MVT::v2i64, Custom);
}
+ // There are no select instructions for vectors
+ if (Subtarget->hasSIMD128())
+ for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ setOperationAction(Op, T, Expand);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto T : {MVT::v2i64, MVT::v2f64})
+ setOperationAction(Op, T, Expand);
+ }
+
// As a special case, these operators use the type to mean the type to
// sign-extend from.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget->hasSignExt()) {
+ // Sign extends are legal only when extending a vector extract
+ auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
for (auto T : {MVT::i8, MVT::i16, MVT::i32})
- setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
}
+ for (auto T : MVT::integer_vector_valuetypes())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
// Dynamic stack allocation: use the default expansion.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -142,21 +201,72 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// - Floating-point extending loads.
// - Floating-point truncating stores.
// - i1 extending loads.
+ // - extending/truncating SIMD loads/stores
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto T : MVT::integer_valuetypes())
for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
setLoadExtAction(Ext, T, MVT::i1, Promote);
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
+ MVT::v2f64}) {
+ for (auto MemT : MVT::vector_valuetypes()) {
+ if (MVT(T) != MemT) {
+ setTruncStoreAction(T, MemT, Expand);
+ for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+ setLoadExtAction(Ext, T, MemT, Expand);
+ }
+ }
+ }
+ }
+
+ // Expand additional SIMD ops that V8 hasn't implemented yet
+ if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ }
+
+ // Custom lower lane accesses to expand out variable indices
+ if (Subtarget->hasSIMD128()) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ for (auto T : {MVT::v2i64, MVT::v2f64}) {
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ }
+ }
+ }
// Trap lowers to wasm unreachable
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Exception handling intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setMaxAtomicSizeInBitsSupported(64);
}
+TargetLowering::AtomicExpansionKind
+WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // We have wasm instructions for these
+ switch (AI->getOperation()) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::Xchg:
+ return AtomicExpansionKind::None;
+ default:
+ break;
+ }
+ return AtomicExpansionKind::CmpXChg;
+}
+
FastISel *WebAssemblyTargetLowering::createFastISel(
FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
return WebAssembly::createFastISel(FuncInfo, LibInfo);
@@ -171,7 +281,8 @@ bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
EVT VT) const {
unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
- if (BitWidth > 1 && BitWidth < 8) BitWidth = 8;
+ if (BitWidth > 1 && BitWidth < 8)
+ BitWidth = 8;
if (BitWidth > 64) {
// The shift will be lowered to a libcall, and compiler-rt libcalls expect
@@ -190,17 +301,11 @@ MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
// undefined result on invalid/overflow, to the WebAssembly opcode, which
// traps on invalid/overflow.
-static MachineBasicBlock *
-LowerFPToInt(
- MachineInstr &MI,
- DebugLoc DL,
- MachineBasicBlock *BB,
- const TargetInstrInfo &TII,
- bool IsUnsigned,
- bool Int64,
- bool Float64,
- unsigned LoweredOpcode
-) {
+static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo &TII,
+ bool IsUnsigned, bool Int64,
+ bool Float64, unsigned LoweredOpcode) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned OutReg = MI.getOperand(0).getReg();
@@ -232,8 +337,7 @@ LowerFPToInt(
// Transfer the remainder of BB and its successor edges to DoneMBB.
DoneMBB->splice(DoneMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(TrueMBB);
@@ -255,45 +359,33 @@ LowerFPToInt(
if (IsUnsigned) {
Tmp0 = InReg;
} else {
- BuildMI(BB, DL, TII.get(Abs), Tmp0)
- .addReg(InReg);
+ BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
}
BuildMI(BB, DL, TII.get(FConst), Tmp1)
.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
- BuildMI(BB, DL, TII.get(LT), CmpReg)
- .addReg(Tmp0)
- .addReg(Tmp1);
+ BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
// For unsigned numbers, we have to do a separate comparison with zero.
if (IsUnsigned) {
Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
- unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ unsigned SecondCmpReg =
+ MRI.createVirtualRegister(&WebAssembly::I32RegClass);
unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(BB, DL, TII.get(FConst), Tmp1)
.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
- BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
- .addReg(Tmp0)
- .addReg(Tmp1);
- BuildMI(BB, DL, TII.get(And), AndReg)
- .addReg(CmpReg)
- .addReg(SecondCmpReg);
+ BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
+ BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
CmpReg = AndReg;
}
- BuildMI(BB, DL, TII.get(Eqz), EqzReg)
- .addReg(CmpReg);
+ BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
// Create the CFG diamond to select between doing the conversion or using
// the substitute value.
- BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
- .addMBB(TrueMBB)
- .addReg(EqzReg);
- BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
- .addReg(InReg);
- BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
- .addMBB(DoneMBB);
- BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
- .addImm(Substitute);
+ BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
+ BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
+ BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
+ BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
.addReg(FalseReg)
.addMBB(FalseMBB)
@@ -303,16 +395,14 @@ LowerFPToInt(
return DoneMBB;
}
-MachineBasicBlock *
-WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
- MachineInstr &MI,
- MachineBasicBlock *BB
-) const {
+MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case WebAssembly::FP_TO_SINT_I32_F32:
return LowerFPToInt(MI, DL, BB, TII, false, false, false,
WebAssembly::I32_TRUNC_S_F32);
@@ -337,17 +427,17 @@ WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
case WebAssembly::FP_TO_UINT_I64_F64:
return LowerFPToInt(MI, DL, BB, TII, true, true, true,
WebAssembly::I64_TRUNC_U_F64);
- llvm_unreachable("Unexpected instruction to emit with custom inserter");
+ llvm_unreachable("Unexpected instruction to emit with custom inserter");
}
}
-const char *WebAssemblyTargetLowering::getTargetNodeName(
- unsigned Opcode) const {
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
- case WebAssemblyISD::FIRST_NUMBER:
- break;
-#define HANDLE_NODETYPE(NODE) \
- case WebAssemblyISD::NODE: \
+ case WebAssemblyISD::FIRST_NUMBER:
+ break;
+#define HANDLE_NODETYPE(NODE) \
+ case WebAssemblyISD::NODE: \
return "WebAssemblyISD::" #NODE;
#include "WebAssemblyISD.def"
#undef HANDLE_NODETYPE
@@ -362,21 +452,21 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
// WebAssembly register class.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'r':
- assert(VT != MVT::iPTR && "Pointer MVT not expected here");
- if (Subtarget->hasSIMD128() && VT.isVector()) {
- if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &WebAssembly::V128RegClass);
- }
- if (VT.isInteger() && !VT.isVector()) {
- if (VT.getSizeInBits() <= 32)
- return std::make_pair(0U, &WebAssembly::I32RegClass);
- if (VT.getSizeInBits() <= 64)
- return std::make_pair(0U, &WebAssembly::I64RegClass);
- }
- break;
- default:
- break;
+ case 'r':
+ assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+ if (Subtarget->hasSIMD128() && VT.isVector()) {
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &WebAssembly::V128RegClass);
+ }
+ if (VT.isInteger() && !VT.isVector()) {
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &WebAssembly::I32RegClass);
+ if (VT.getSizeInBits() <= 64)
+ return std::make_pair(0U, &WebAssembly::I64RegClass);
+ }
+ break;
+ default:
+ break;
}
}
@@ -395,16 +485,17 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM,
- Type *Ty,
- unsigned AS,
+ Type *Ty, unsigned AS,
Instruction *I) const {
// WebAssembly offsets are added as unsigned without wrapping. The
// isLegalAddressingMode gives us no way to determine if wrapping could be
// happening, so we approximate this by accepting only non-negative offsets.
- if (AM.BaseOffs < 0) return false;
+ if (AM.BaseOffs < 0)
+ return false;
// WebAssembly has no scale register operands.
- if (AM.Scale != 0) return false;
+ if (AM.Scale != 0)
+ return false;
// Everything else is legal.
return true;
@@ -418,7 +509,8 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
// for the kinds of things that LLVM uses this for (merging adjacent stores
// of constants, etc.), WebAssembly implementations will either want the
// unaligned access or they'll split anyway.
- if (Fast) *Fast = true;
+ if (Fast)
+ *Fast = true;
return true;
}
@@ -438,6 +530,46 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
return TargetLowering::getSetCCResultType(DL, C, VT);
}
+bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::wasm_atomic_notify:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ // atomic.notify instruction does not really load the memory specified with
+ // this argument, but MachineMemOperand should either be load or store, so
+ // we set this to a load.
+ // FIXME Volatile isn't really correct, but currently all LLVM atomic
+ // instructions are treated as volatiles in the backend, so we should be
+ // consistent. The same applies for wasm_atomic_wait intrinsics too.
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::wasm_atomic_wait_i32:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::wasm_atomic_wait_i64:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i64;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 8;
+ Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//
@@ -465,8 +597,9 @@ static bool CallingConvSupported(CallingConv::ID CallConv) {
CallConv == CallingConv::CXX_FAST_TLS;
}
-SDValue WebAssemblyTargetLowering::LowerCall(
- CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const {
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
@@ -568,9 +701,9 @@ SDValue WebAssemblyTargetLowering::LowerCall(
FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
DAG.getConstant(Offset, DL, PtrVT));
- Chains.push_back(DAG.getStore(
- Chain, DL, Arg, Add,
- MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
+ Chains.push_back(
+ DAG.getStore(Chain, DL, Arg, Add,
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
}
if (!Chains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
@@ -588,7 +721,8 @@ SDValue WebAssemblyTargetLowering::LowerCall(
Ops.append(OutVals.begin(),
IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
// Add a pointer to the vararg buffer.
- if (IsVarArg) Ops.push_back(FINode);
+ if (IsVarArg)
+ Ops.push_back(FINode);
SmallVector<EVT, 8> InTys;
for (const auto &In : Ins) {
@@ -682,11 +816,10 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
// Ignore In.getOrigAlign() because all our arguments are passed in
// registers.
- InVals.push_back(
- In.Used
- ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
- DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
- : DAG.getUNDEF(In.VT));
+ InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+ DAG.getTargetConstant(InVals.size(),
+ DL, MVT::i32))
+ : DAG.getUNDEF(In.VT));
// Record the number and types of arguments.
MFI->addParam(In.VT);
@@ -706,12 +839,18 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
MFI->addParam(PtrVT);
}
- // Record the number and types of results.
+ // Record the number and types of arguments and results.
SmallVector<MVT, 4> Params;
SmallVector<MVT, 4> Results;
- ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
+ ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
+ DAG.getTarget(), Params, Results);
for (MVT VT : Results)
MFI->addResult(VT);
+ // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
+ // the param logic here with ComputeSignatureVTs
+ assert(MFI->getParams().size() == Params.size() &&
+ std::equal(MFI->getParams().begin(), MFI->getParams().end(),
+ Params.begin()));
return Chain;
}
@@ -724,34 +863,47 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("unimplemented operation lowering");
- return SDValue();
- case ISD::FrameIndex:
- return LowerFrameIndex(Op, DAG);
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
- case ISD::ExternalSymbol:
- return LowerExternalSymbol(Op, DAG);
- case ISD::JumpTable:
- return LowerJumpTable(Op, DAG);
- case ISD::BR_JT:
- return LowerBR_JT(Op, DAG);
- case ISD::VASTART:
- return LowerVASTART(Op, DAG);
- case ISD::BlockAddress:
- case ISD::BRIND:
- fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
- return SDValue();
- case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
- fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
- return SDValue();
- case ISD::FRAMEADDR:
- return LowerFRAMEADDR(Op, DAG);
- case ISD::CopyToReg:
- return LowerCopyToReg(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN:
- return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ default:
+ llvm_unreachable("unimplemented operation lowering");
+ return SDValue();
+ case ISD::FrameIndex:
+ return LowerFrameIndex(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ case ISD::BlockAddress:
+ case ISD::BRIND:
+ fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
+ return SDValue();
+ case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
+ fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
+ return SDValue();
+ case ISD::FRAMEADDR:
+ return LowerFRAMEADDR(Op, DAG);
+ case ISD::CopyToReg:
+ return LowerCopyToReg(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerAccessVectorElement(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG:
+ return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return LowerShift(Op, DAG);
}
}
@@ -763,21 +915,20 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
// the FI to some LEA-like instruction, but since we don't have that, we
// need to insert some kind of instruction that can take an FI operand and
// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
- // copy_local between Op and its FI operand.
+ // local.copy between Op and its FI operand.
SDValue Chain = Op.getOperand(0);
SDLoc DL(Op);
unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
EVT VT = Src.getValueType();
- SDValue Copy(
- DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
- : WebAssembly::COPY_I64,
- DL, VT, Src),
- 0);
+ SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
+ : WebAssembly::COPY_I64,
+ DL, VT, Src),
+ 0);
return Op.getNode()->getNumValues() == 1
? DAG.getCopyToReg(Chain, DL, Reg, Copy)
- : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4
- ? Op.getOperand(3)
- : SDValue());
+ : DAG.getCopyToReg(Chain, DL, Reg, Copy,
+ Op.getNumOperands() == 4 ? Op.getOperand(3)
+ : SDValue());
}
return SDValue();
}
@@ -817,8 +968,9 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
}
-SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
- SDValue Op, SelectionDAG &DAG) const {
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *ES = cast<ExternalSymbolSDNode>(Op);
EVT VT = Op.getValueType();
@@ -829,9 +981,10 @@ SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
// we don't know anything about the symbol other than its name, because all
// external symbols used in target-independent SelectionDAG code are for
// functions.
- return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
- /*TargetFlags=*/0x1));
+ return DAG.getNode(
+ WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+ WebAssemblyII::MO_SYMBOL_FUNCTION));
}
SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
@@ -860,7 +1013,8 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
// Add an operand for each case.
- for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB));
+ for (auto MBB : MBBs)
+ Ops.push_back(DAG.getBasicBlock(MBB));
// TODO: For now, we just pick something arbitrary for a default case for now.
// We really want to sniff out the guard and put in the real default case (and
@@ -893,10 +1047,181 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
return {}; // Don't custom lower most intrinsics.
- case Intrinsic::wasm_lsda:
- // TODO For now, just return 0 not to crash
- return DAG.getConstant(0, DL, Op.getValueType());
+ case Intrinsic::wasm_lsda: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ auto &Context = MF.getMMI().getContext();
+ MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
+ Twine(MF.getFunctionNumber()));
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getMCSymbol(S, PtrVT));
+ }
+ }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ SDLoc DL(Op);
+
+ switch (IntNo) {
+ default:
+ return {}; // Don't custom lower most intrinsics.
+
+ case Intrinsic::wasm_throw: {
+ int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+ switch (Tag) {
+ case CPP_EXCEPTION: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+ SDValue SymNode =
+ DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(
+ SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
+ return DAG.getNode(WebAssemblyISD::THROW, DL,
+ MVT::Other, // outchain type
+ {
+ Op.getOperand(0), // inchain
+ SymNode, // exception symbol
+ Op.getOperand(3) // thrown value
+ });
+ }
+ default:
+ llvm_unreachable("Invalid tag!");
+ }
+ break;
+ }
+ }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If sign extension operations are disabled, allow sext_inreg only if operand
+ // is a vector extract. SIMD does not depend on sign extension operations, but
+ // allowing sext_inreg in this context lets us have simple patterns to select
+ // extract_lane_s instructions. Expanding sext_inreg everywhere would be
+ // simpler in this file, but would necessitate large and brittle patterns to
+ // undo the expansion and select extract_lane_s instructions.
+ assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
+ if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+ return Op;
+ // Otherwise expand
+ return SDValue();
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
+ MVT VecType = Op.getOperand(0).getSimpleValueType();
+ assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
+ size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
+
+ // Space for two vector args and sixteen mask indices
+ SDValue Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = Op.getOperand(0);
+ Ops[OpIdx++] = Op.getOperand(1);
+
+ // Expand mask indices to byte indices and materialize them as operands
+ for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
+ for (size_t J = 0; J < LaneBytes; ++J) {
+ // Lower undefs (represented by -1 in mask) to zero
+ uint64_t ByteIndex =
+ Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+ Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
+ }
+ }
+
+ return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Allow constant lane indices, expand variable lane indices
+ SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
+ if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
+ return Op;
+ else
+ // Perform default expansion
+ return SDValue();
+}
+
+static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
+ EVT LaneT = Op.getSimpleValueType().getVectorElementType();
+ // 32-bit and 64-bit unrolled shifts will have proper semantics
+ if (LaneT.bitsGE(MVT::i32))
+ return DAG.UnrollVectorOp(Op.getNode());
+ // Otherwise mask the shift value to get proper semantics from 32-bit shift
+ SDLoc DL(Op);
+ SDValue ShiftVal = Op.getOperand(1);
+ uint64_t MaskVal = LaneT.getSizeInBits() - 1;
+ SDValue MaskedShiftVal = DAG.getNode(
+ ISD::AND, // mask opcode
+ DL, ShiftVal.getValueType(), // masked value type
+ ShiftVal, // original shift value operand
+ DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
+ );
+
+ return DAG.UnrollVectorOp(
+ DAG.getNode(Op.getOpcode(), // original shift opcode
+ DL, Op.getValueType(), // original return type
+ Op.getOperand(0), // original vector operand,
+ MaskedShiftVal // new masked shift value operand
+ )
+ .getNode());
+}
+
+SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ // Only manually lower vector shifts
+ assert(Op.getSimpleValueType().isVector());
+
+ // Expand all vector shifts until V8 fixes its implementation
+ // TODO: remove this once V8 is fixed
+ if (!Subtarget->hasUnimplementedSIMD128())
+ return UnrollVectorShift(Op, DAG);
+
+ // Unroll non-splat vector shifts
+ BuildVectorSDNode *ShiftVec;
+ SDValue SplatVal;
+ if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
+ !(SplatVal = ShiftVec->getSplatValue()))
+ return UnrollVectorShift(Op, DAG);
+
+ // All splats except i64x2 const splats are handled by patterns
+ ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
+ if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
+ return Op;
+
+ // i64x2 const splats are custom lowered to avoid unnecessary wraps
+ unsigned Opcode;
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ Opcode = WebAssemblyISD::VEC_SHL;
+ break;
+ case ISD::SRA:
+ Opcode = WebAssemblyISD::VEC_SHR_S;
+ break;
+ case ISD::SRL:
+ Opcode = WebAssemblyISD::VEC_SHR_U;
+ break;
+ default:
+ llvm_unreachable("unexpected opcode");
}
+ APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
+ return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstant(Shift, DL, MVT::i32));
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 79819493ac6a..59f4230ed889 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -29,21 +29,22 @@ enum NodeType : unsigned {
#undef HANDLE_NODETYPE
};
-} // end namespace WebAssemblyISD
+} // end namespace WebAssemblyISD
class WebAssemblySubtarget;
class WebAssemblyTargetMachine;
class WebAssemblyTargetLowering final : public TargetLowering {
- public:
+public:
WebAssemblyTargetLowering(const TargetMachine &TM,
const WebAssemblySubtarget &STI);
- private:
+private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
+ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
@@ -52,9 +53,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
- std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, StringRef Constraint,
- MVT VT) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -66,6 +67,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -94,13 +98,18 @@ class WebAssemblyTargetLowering final : public TargetLowering {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
};
namespace WebAssembly {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
-} // end namespace WebAssembly
+} // end namespace WebAssembly
-} // end namespace llvm
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index d879932b3232..5fb8ef90bc43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -16,10 +16,16 @@
// Atomic loads
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> inst = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ inst>,
+ Requires<[HasAtomics]>;
+}
+
defm ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
defm ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
-} // Defs = [ARGUMENTS]
// Select loads with no constant offset.
let Predicates = [HasAtomics] in {
@@ -54,13 +60,11 @@ def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
// Extending loads. Note that there are only zero-extending atomic loads, no
// sign-extending loads.
-let Defs = [ARGUMENTS] in {
defm ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
defm ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
defm ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
defm ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
-} // Defs = [ARGUMENTS]
// Fragments for extending loads. These are different from regular loads because
// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
@@ -110,7 +114,7 @@ def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i32
+// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
// Zero-extending loads with constant offset
def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>;
@@ -192,10 +196,8 @@ def : LoadPatExternSymOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
// Atomic stores
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
defm ATOMIC_STORE_I32 : WebAssemblyStore<I32, "i32.atomic.store", 0xfe17>;
defm ATOMIC_STORE_I64 : WebAssemblyStore<I64, "i64.atomic.store", 0xfe18>;
-} // Defs = [ARGUMENTS]
// We need an 'atomic' version of store patterns because store and atomic_store
// nodes have different operand orders:
@@ -255,13 +257,11 @@ def : AStorePatExternSymOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
} // Predicates = [HasAtomics]
// Truncating stores.
-let Defs = [ARGUMENTS] in {
defm ATOMIC_STORE8_I32 : WebAssemblyStore<I32, "i32.atomic.store8", 0xfe19>;
defm ATOMIC_STORE16_I32 : WebAssemblyStore<I32, "i32.atomic.store16", 0xfe1a>;
defm ATOMIC_STORE8_I64 : WebAssemblyStore<I64, "i64.atomic.store8", 0xfe1b>;
defm ATOMIC_STORE16_I64 : WebAssemblyStore<I64, "i64.atomic.store16", 0xfe1c>;
defm ATOMIC_STORE32_I64 : WebAssemblyStore<I64, "i64.atomic.store32", 0xfe1d>;
-} // Defs = [ARGUMENTS]
// Fragments for truncating stores.
@@ -333,8 +333,6 @@ def : AStorePatExternSymOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
// Atomic binary read-modify-writes
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
defm "" : I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
@@ -346,83 +344,82 @@ multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
defm ATOMIC_RMW8_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.add", 0xfe20>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0xfe20>;
defm ATOMIC_RMW16_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.add", 0xfe21>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0xfe21>;
defm ATOMIC_RMW8_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.add", 0xfe22>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0xfe22>;
defm ATOMIC_RMW16_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.add", 0xfe23>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0xfe23>;
defm ATOMIC_RMW32_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.add", 0xfe24>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0xfe24>;
defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
defm ATOMIC_RMW8_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.sub", 0xfe27>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0xfe27>;
defm ATOMIC_RMW16_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.sub", 0xfe28>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0xfe28>;
defm ATOMIC_RMW8_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.sub", 0xfe29>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0xfe29>;
defm ATOMIC_RMW16_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.sub", 0xfe2a>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0xfe2a>;
defm ATOMIC_RMW32_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.sub", 0xfe2b>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0xfe2b>;
defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
defm ATOMIC_RMW8_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.and", 0xfe2e>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0xfe2e>;
defm ATOMIC_RMW16_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.and", 0xfe2f>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0xfe2f>;
defm ATOMIC_RMW8_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.and", 0xfe30>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0xfe30>;
defm ATOMIC_RMW16_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.and", 0xfe31>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0xfe31>;
defm ATOMIC_RMW32_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.and", 0xfe32>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0xfe32>;
defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
defm ATOMIC_RMW8_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.or", 0xfe35>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0xfe35>;
defm ATOMIC_RMW16_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.or", 0xfe36>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0xfe36>;
defm ATOMIC_RMW8_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.or", 0xfe37>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0xfe37>;
defm ATOMIC_RMW16_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.or", 0xfe38>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0xfe38>;
defm ATOMIC_RMW32_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.or", 0xfe39>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0xfe39>;
defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
defm ATOMIC_RMW8_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xor", 0xfe3c>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0xfe3c>;
defm ATOMIC_RMW16_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xor", 0xfe3d>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0xfe3d>;
defm ATOMIC_RMW8_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xor", 0xfe3e>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0xfe3e>;
defm ATOMIC_RMW16_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xor", 0xfe3f>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0xfe3f>;
defm ATOMIC_RMW32_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xor", 0xfe40>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0xfe40>;
defm ATOMIC_RMW_XCHG_I32 :
WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
defm ATOMIC_RMW_XCHG_I64 :
WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
defm ATOMIC_RMW8_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xchg", 0xfe43>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0xfe43>;
defm ATOMIC_RMW16_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xchg", 0xfe44>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0xfe44>;
defm ATOMIC_RMW8_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xchg", 0xfe45>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0xfe45>;
defm ATOMIC_RMW16_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xchg", 0xfe46>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0xfe46>;
defm ATOMIC_RMW32_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xchg", 0xfe47>;
-}
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0xfe47>;
// Select binary RMWs with no constant offset.
class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -533,7 +530,7 @@ class sext_bin_rmw_8_64<PatFrag kind> :
PatFrag<(ops node:$addr, node:$val),
(anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
-// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
// Patterns for various addressing modes for truncating-extending binary RMWs.
multiclass BinRMWTruncExtPattern<
@@ -655,3 +652,368 @@ defm : BinRMWTruncExtPattern<
ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32,
ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>;
} // Predicates = [HasAtomics]
+
+//===----------------------------------------------------------------------===//
+// Atomic ternary read-modify-writes
+//===----------------------------------------------------------------------===//
+
+// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, success
+// flag}. When we use the success flag or both values, we can't make use of i64
+// truncate/extend versions of instructions for now, which is suboptimal.
+// Consider adding a pass after instruction selection that optimizes this case
+// if it is frequent.
+
+multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
+ defm "" : I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
+ rc:$new),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
+ !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+}
+
+defm ATOMIC_RMW_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0xfe48>;
+defm ATOMIC_RMW_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0xfe49>;
+defm ATOMIC_RMW8_U_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0xfe4a>;
+defm ATOMIC_RMW16_U_CMPXCHG_I32 :
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0xfe4b>;
+defm ATOMIC_RMW8_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0xfe4c>;
+defm ATOMIC_RMW16_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0xfe4d>;
+defm ATOMIC_RMW32_U_CMPXCHG_I64 :
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0xfe4e>;
+
+// Select ternary RMWs with no constant offset.
+class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
+ (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with a constant offset.
+
+// Pattern with address + immediate offset
+class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
+ Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
+ (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ ty:$exp, ty:$new)),
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ ty:$exp, ty:$new)),
+ (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with just a constant offset.
+class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
+ (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
+ Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)),
+ (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+// Patterns for various addressing modes.
+multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
+ NI inst_64> {
+ def : TerRMWPatNoOffset<i32, rmw_32, inst_32>;
+ def : TerRMWPatNoOffset<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+ def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+ def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+ def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+
+ def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
+ def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatExternalSym<i32, rmw_32, inst_32>;
+ def : TerRMWPatExternalSym<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
+ def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
+ ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
+} // Predicates = [HasAtomics]
+
+// Truncating & zero-extending ternary RMW patterns.
+// DAG legalization & optimization before instruction selection may introduce
+// additional nodes such as anyext or assertzext depending on operand types.
+class zext_ter_rmw_8_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>;
+class zext_ter_rmw_16_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>;
+class zext_ter_rmw_8_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (zext (i32 (assertzext (i32 (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))))>;
+class zext_ter_rmw_16_64<PatFrag kind> : zext_ter_rmw_8_64<kind>;
+class zext_ter_rmw_32_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (zext (i32 (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))>;
+
+// Truncating & sign-extending ternary RMW patterns.
+// We match subword RMWs (for 32-bit) and anyext RMWs (for 64-bit) and select a
+// zext RMW; the next instruction will be sext_inreg which is selected by
+// itself.
+class sext_ter_rmw_8_32<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (kind node:$addr, node:$exp, node:$new)>;
+class sext_ter_rmw_16_32<PatFrag kind> : sext_ter_rmw_8_32<kind>;
+class sext_ter_rmw_8_64<PatFrag kind> :
+ PatFrag<(ops node:$addr, node:$exp, node:$new),
+ (anyext (i32 (assertzext (i32
+ (kind node:$addr,
+ (i32 (trunc (i64 node:$exp))),
+ (i32 (trunc (i64 node:$new))))))))>;
+class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
+
+// Patterns for various addressing modes for truncating-extending ternary RMWs.
+multiclass TerRMWTruncExtPattern<
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
+ // Truncating-extending ternary RMWs with no constant offset
+ def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ // Truncating-extending ternary RMWs with a constant offset
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+
+ def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatExternalSym<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternalSym<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatExternalSym<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatExternalSym<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternalSym<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternalSym<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternalSym<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ // Truncating-extending ternary RMWs with just a constant offset
+ def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWTruncExtPattern<
+ atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
+ ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
+ ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
+ ATOMIC_RMW32_U_CMPXCHG_I64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic wait / notify
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+defm ATOMIC_NOTIFY :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "atomic.notify \t${off}, ${p2align}", 0xfe00>;
+let mayLoad = 1 in {
+defm ATOMIC_WAIT_I32 :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i32.atomic.wait \t${off}, ${p2align}", 0xfe01>;
+defm ATOMIC_WAIT_I64 :
+ I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i64.atomic.wait \t${off}, ${p2align}", 0xfe02>;
+} // mayLoad = 1
+} // hasSideEffects = 1
+
+let Predicates = [HasAtomics] in {
+// Select notifys with no constant offset.
+class NotifyPatNoOffset<Intrinsic kind> :
+ Pat<(i32 (kind I32:$addr, I32:$count)),
+ (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+def : NotifyPatNoOffset<int_wasm_atomic_notify>;
+
+// Select notifys with a constant offset.
+
+// Pattern with address + immediate offset
+class NotifyPatImmOff<Intrinsic kind, PatFrag operand> :
+ Pat<(i32 (kind (operand I32:$addr, imm:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, regPlusImm>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, or_is_add>;
+
+class NotifyPatGlobalAddr<Intrinsic kind> :
+ Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
+def : NotifyPatGlobalAddr<int_wasm_atomic_notify>;
+
+class NotifyPatExternalSym<Intrinsic kind> :
+ Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, texternalsym:$off, I32:$addr, I32:$count)>;
+def : NotifyPatExternalSym<int_wasm_atomic_notify>;
+
+// Select notifys with just a constant offset.
+class NotifyPatOffsetOnly<Intrinsic kind> :
+ Pat<(i32 (kind imm:$off, I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatOffsetOnly<int_wasm_atomic_notify>;
+
+class NotifyPatGlobalAddrOffOnly<Intrinsic kind> :
+ Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatGlobalAddrOffOnly<int_wasm_atomic_notify>;
+
+class NotifyPatExternSymOffOnly<Intrinsic kind> :
+ Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, texternalsym:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatExternSymOffOnly<int_wasm_atomic_notify>;
+
+// Select waits with no constant offset.
+class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+ (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select waits with a constant offset.
+
+// Pattern with address + immediate offset
+class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
+ Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternalSym<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+ ty:$exp, I64:$timeout)),
+ (inst 0, texternalsym:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatExternalSym<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternalSym<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
+class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternSymOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), ty:$exp,
+ I64:$timeout)),
+ (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatExternSymOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternSymOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+} // Predicates = [HasAtomics]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 34262752430c..07839b790114 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -15,8 +15,6 @@
// TODO: addr64: These currently assume the callee address is 32-bit.
// FIXME: add $type to first call_indirect asmstr (and maybe $flags)
-let Defs = [ARGUMENTS] in {
-
// Call sequence markers. These have an immediate which represents the amount of
// stack space to allocate or free, which is used for varargs lowering.
let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
@@ -52,34 +50,35 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
}
multiclass SIMD_CALL<ValueType vt, string prefix> {
- defm CALL_#vt : SIMD_I<(outs V128:$dst), (ins function32_op:$callee,
- variable_ops),
- (outs), (ins function32_op:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 (i32 imm:$callee)))],
- !strconcat(prefix, "call\t$dst, $callee"),
- !strconcat(prefix, "call\t$callee"),
- 0x10>;
+
+ defm CALL_#vt : I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee),
+ [(set (vt V128:$dst),
+ (WebAssemblycall1 (i32 imm:$callee)))],
+ !strconcat(prefix, "call\t$dst, $callee"),
+ !strconcat(prefix, "call\t$callee"),
+ 0x10>,
+ Requires<[HasSIMD128]>;
let isCodeGenOnly = 1 in {
- defm PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 I32:$callee))],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">;
+ defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
+ (ins I32:$callee, variable_ops),
+ (outs), (ins I32:$callee),
+ [(set (vt V128:$dst),
+ (WebAssemblycall1 I32:$callee))],
+ "PSEUDO CALL INDIRECT\t$callee",
+ "PSEUDO CALL INDIRECT\t$callee">,
+ Requires<[HasSIMD128]>;
} // isCodeGenOnly = 1
- defm CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins TypeIndex:$type, i32imm:$flags,
- variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- !strconcat(prefix,
- "call_indirect\t$dst"),
- !strconcat(prefix, "call_indirect\t$type"),
- 0x11>;
+ defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags),
+ [],
+ !strconcat(prefix, "call_indirect\t$dst"),
+ !strconcat(prefix, "call_indirect\t$type"),
+ 0x11>,
+ Requires<[HasSIMD128]>;
}
let Uses = [SP32, SP64], isCall = 1 in {
@@ -88,10 +87,12 @@ let Uses = [SP32, SP64], isCall = 1 in {
defm "" : CALL<F32, "f32.">;
defm "" : CALL<F64, "f64.">;
defm "" : CALL<EXCEPT_REF, "except_ref.">;
- defm "" : SIMD_CALL<v16i8, "i8x16.">;
- defm "" : SIMD_CALL<v8i16, "i16x8.">;
- defm "" : SIMD_CALL<v4i32, "i32x4.">;
- defm "" : SIMD_CALL<v4f32, "f32x4.">;
+ defm "" : SIMD_CALL<v16i8, "v128.">;
+ defm "" : SIMD_CALL<v8i16, "v128.">;
+ defm "" : SIMD_CALL<v4i32, "v128.">;
+ defm "" : SIMD_CALL<v2i64, "v128.">;
+ defm "" : SIMD_CALL<v4f32, "v128.">;
+ defm "" : SIMD_CALL<v2f64, "v128.">;
defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
(outs), (ins function32_op:$callee),
@@ -115,8 +116,6 @@ let Uses = [SP32, SP64], isCall = 1 in {
0x11>;
} // Uses = [SP32,SP64], isCall = 1
-} // Defs = [ARGUMENTS]
-
// Patterns for matching a direct call to a global address.
def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_I32 tglobaladdr:$callee)>;
@@ -132,8 +131,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(ExceptRef
(WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_EXCEPT_REF tglobaladdr:$callee)>;
@@ -155,8 +158,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(ExceptRef
(WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_EXCEPT_REF texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index d90244b90662..7eb6cbf4d249 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -12,8 +12,6 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
// The condition operand is a boolean value which WebAssembly represents as i32.
defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
@@ -30,47 +28,37 @@ defm BR : NRI<(outs), (ins bb_op:$dst),
} // isBarrier = 1
} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
-} // Defs = [ARGUMENTS]
-
def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
(BR_IF bb_op:$dst, I32:$cond)>;
def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
(BR_UNLESS bb_op:$dst, I32:$cond)>;
-let Defs = [ARGUMENTS] in {
+// A list of branch targets enclosed in {} and separated by comma.
+// Used by br_table only.
+def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
+let OperandNamespace = "WebAssembly" in {
+let OperandType = "OPERAND_BRLIST" in {
+def brlist : Operand<i32> {
+ let ParserMatchClass = BrListAsmOperand;
+ let PrintMethod = "printBrList";
+}
+} // OPERAND_BRLIST
+} // OperandNamespace = "WebAssembly"
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
// currently.
-// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
-// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
-// FIXME: this can't inherit from I<> since there is no way to inherit from a
-// multiclass and still have the let statements.
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-def BR_TABLE_I32 : NI<(outs), (ins I32:$index, variable_ops),
- [(WebAssemblybr_table I32:$index)], 0,
- "br_table \t$index", 0x0e> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I32_S : NI<(outs), (ins I32:$index),
- [], 1,
- "br_table \t$index", 0x0e> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I64 : NI<(outs), (ins I64:$index, variable_ops),
- [(WebAssemblybr_table I64:$index)], 0,
- "br_table \t$index"> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
-def BR_TABLE_I64_S : NI<(outs), (ins I64:$index),
- [], 1,
- "br_table \t$index"> {
- let TSFlags{0} = 1;
- let TSFlags{1} = 1;
-}
+defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
+ (outs), (ins brlist:$brl),
+ [(WebAssemblybr_table I32:$index)],
+ "br_table \t$index", "br_table \t$brl",
+ 0x0e>;
+defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
+ (outs), (ins brlist:$brl),
+ [(WebAssemblybr_table I64:$index)],
+ "br_table \t$index", "br_table \t$brl",
+ 0x0e>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
// This is technically a control-flow instruction, since all it affects is the
@@ -81,13 +69,19 @@ defm NOP : NRI<(outs), (ins), [], "nop", 0x01>;
// These use/clobber VALUE_STACK to prevent them from being moved into the
// middle of an expression tree.
let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
-defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
-defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
+defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
+
+defm IF : I<(outs), (ins Signature:$sig, I32:$cond),
+ (outs), (ins Signature:$sig),
+ [], "if \t$sig, $cond", "if \t$sig", 0x04>;
+defm ELSE : NRI<(outs), (ins), [], "else", 0x05>;
-// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode in
-// wasm.
+// END_BLOCK, END_LOOP, END_IF and END_FUNCTION are represented with the same
+// opcode in wasm.
defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>;
+defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>;
let isTerminator = 1, isBarrier = 1 in
defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -103,14 +97,16 @@ multiclass RETURN<WebAssemblyRegClass vt> {
}
multiclass SIMD_RETURN<ValueType vt> {
- defm RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
- [(WebAssemblyreturn (vt V128:$val))],
- "return \t$val", "return", 0x0f>;
+ defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+ [(WebAssemblyreturn (vt V128:$val))],
+ "return \t$val", "return", 0x0f>,
+ Requires<[HasSIMD128]>;
// Equivalent to RETURN_#vt, for use at the end of a function when wasm
// semantics return by falling off the end of the block.
let isCodeGenOnly = 1 in
- defm FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
- []>;
+ defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+ []>,
+ Requires<[HasSIMD128]>;
}
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -124,7 +120,9 @@ let isReturn = 1 in {
defm "": SIMD_RETURN<v16i8>;
defm "": SIMD_RETURN<v8i16>;
defm "": SIMD_RETURN<v4i32>;
+ defm "": SIMD_RETURN<v2i64>;
defm "": SIMD_RETURN<v4f32>;
+ defm "": SIMD_RETURN<v2f64>;
defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
@@ -144,14 +142,16 @@ let Predicates = [HasExceptionHandling] in {
// Throwing an exception: throw / rethrow
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-defm THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$val),
- (outs), (ins i32imm:$tag),
- [(int_wasm_throw imm:$tag, I32:$val)],
+defm THROW_I32 : I<(outs), (ins event_op:$tag, I32:$val),
+ (outs), (ins event_op:$tag),
+ [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+ I32:$val)],
"throw \t$tag, $val", "throw \t$tag",
0x08>;
-defm THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$val),
- (outs), (ins i32imm:$tag),
- [(int_wasm_throw imm:$tag, I64:$val)],
+defm THROW_I64 : I<(outs), (ins event_op:$tag, I64:$val),
+ (outs), (ins event_op:$tag),
+ [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+ I64:$val)],
"throw \t$tag, $val", "throw \t$tag",
0x08>;
defm RETHROW : NRI<(outs), (ins bb_op:$dst), [], "rethrow \t$dst", 0x09>;
@@ -168,7 +168,7 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
// Catching an exception: catch / catch_all
-let hasCtrlDep = 1 in {
+let hasCtrlDep = 1, hasSideEffects = 1 in {
defm CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag),
(outs), (ins i32imm:$tag),
[(set I32:$dst, (int_wasm_catch imm:$tag))],
@@ -181,14 +181,10 @@ defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
}
// Pseudo instructions: cleanupret / catchret
-// They are not return instructions in wasm, but setting 'isReturn' to true as
-// in X86 is necessary for computing EH scope membership.
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
- isCodeGenOnly = 1, isReturn = 1 in {
+ isCodeGenOnly = 1, isEHScopeReturn = 1 in {
defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "", 0>;
defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
[(catchret bb:$dst, bb:$from)], "", 0>;
}
}
-
-} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index c89c1b549816..e128656a142c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,19 +13,17 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
defm I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
[(set I32:$dst, (trunc I64:$src))],
- "i32.wrap/i64\t$dst, $src", "i32.wrap/i64", 0xa7>;
+ "i32.wrap_i64\t$dst, $src", "i32.wrap_i64", 0xa7>;
defm I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
[(set I64:$dst, (sext I32:$src))],
- "i64.extend_s/i32\t$dst, $src", "i64.extend_s/i32",
+ "i64.extend_i32_s\t$dst, $src", "i64.extend_i32_s",
0xac>;
defm I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
[(set I64:$dst, (zext I32:$src))],
- "i64.extend_u/i32\t$dst, $src", "i64.extend_u/i32",
+ "i64.extend_i32_u\t$dst, $src", "i64.extend_i32_u",
0xad>;
let Predicates = [HasSignExt] in {
@@ -51,58 +49,72 @@ defm I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
0xc4>;
} // Predicates = [HasSignExt]
-} // defs = [ARGUMENTS]
-
// Expand a "don't care" extend into zero-extend (chosen over sign-extend
// somewhat arbitrarily, although it favors popular hardware architectures
// and is conceptually a simpler operation).
def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
-let Defs = [ARGUMENTS] in {
-
// Conversion from floating point to integer instructions which don't trap on
// overflow or invalid.
defm I32_TRUNC_S_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (fp_to_sint F32:$src))],
- "i32.trunc_s:sat/f32\t$dst, $src",
- "i32.trunc_s:sat/f32", 0xfc00>,
+ "i32.trunc_sat_f32_s\t$dst, $src",
+ "i32.trunc_sat_f32_s", 0xfc00>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_U_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (fp_to_uint F32:$src))],
- "i32.trunc_u:sat/f32\t$dst, $src",
- "i32.trunc_u:sat/f32", 0xfc01>,
+ "i32.trunc_sat_f32_u\t$dst, $src",
+ "i32.trunc_sat_f32_u", 0xfc01>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_S_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
[(set I64:$dst, (fp_to_sint F32:$src))],
- "i64.trunc_s:sat/f32\t$dst, $src",
- "i64.trunc_s:sat/f32", 0xfc04>,
+ "i64.trunc_sat_f32_s\t$dst, $src",
+ "i64.trunc_sat_f32_s", 0xfc04>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_U_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
[(set I64:$dst, (fp_to_uint F32:$src))],
- "i64.trunc_u:sat/f32\t$dst, $src",
- "i64.trunc_u:sat/f32", 0xfc05>,
+ "i64.trunc_sat_f32_u\t$dst, $src",
+ "i64.trunc_sat_f32_u", 0xfc05>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_S_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
[(set I32:$dst, (fp_to_sint F64:$src))],
- "i32.trunc_s:sat/f64\t$dst, $src",
- "i32.trunc_s:sat/f64", 0xfc02>,
+ "i32.trunc_sat_f64_s\t$dst, $src",
+ "i32.trunc_sat_f64_s", 0xfc02>,
Requires<[HasNontrappingFPToInt]>;
defm I32_TRUNC_U_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
[(set I32:$dst, (fp_to_uint F64:$src))],
- "i32.trunc_u:sat/f64\t$dst, $src",
- "i32.trunc_u:sat/f64", 0xfc03>,
+ "i32.trunc_sat_f64_u\t$dst, $src",
+ "i32.trunc_sat_f64_u", 0xfc03>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_S_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (fp_to_sint F64:$src))],
- "i64.trunc_s:sat/f64\t$dst, $src",
- "i64.trunc_s:sat/f64", 0xfc06>,
+ "i64.trunc_sat_f64_s\t$dst, $src",
+ "i64.trunc_sat_f64_s", 0xfc06>,
Requires<[HasNontrappingFPToInt]>;
defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (fp_to_uint F64:$src))],
- "i64.trunc_u:sat/f64\t$dst, $src",
- "i64.trunc_u:sat/f64", 0xfc07>,
+ "i64.trunc_sat_f64_u\t$dst, $src",
+ "i64.trunc_sat_f64_u", 0xfc07>,
Requires<[HasNontrappingFPToInt]>;
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+ (I32_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+ (I32_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+ (I32_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+ (I32_TRUNC_U_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+ (I64_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+ (I64_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+ (I64_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+ (I64_TRUNC_U_SAT_F64 F64:$src)>;
+
// Conversion from floating point to integer pseudo-instructions which don't
// trap on overflow or invalid.
let usesCustomInserter = 1, isCodeGenOnly = 1 in {
@@ -135,88 +147,86 @@ defm FP_TO_UINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
// Conversion from floating point to integer traps on overflow and invalid.
let hasSideEffects = 1 in {
defm I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
- [], "i32.trunc_s/f32\t$dst, $src", "i32.trunc_s/f32",
+ [], "i32.trunc_f32_s\t$dst, $src", "i32.trunc_f32_s",
0xa8>;
defm I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
- [], "i32.trunc_u/f32\t$dst, $src", "i32.trunc_u/f32",
+ [], "i32.trunc_f32_u\t$dst, $src", "i32.trunc_f32_u",
0xa9>;
defm I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
- [], "i64.trunc_s/f32\t$dst, $src", "i64.trunc_s/f32",
+ [], "i64.trunc_f32_s\t$dst, $src", "i64.trunc_f32_s",
0xae>;
defm I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
- [], "i64.trunc_u/f32\t$dst, $src", "i64.trunc_u/f32",
+ [], "i64.trunc_f32_u\t$dst, $src", "i64.trunc_f32_u",
0xaf>;
defm I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
- [], "i32.trunc_s/f64\t$dst, $src", "i32.trunc_s/f64",
+ [], "i32.trunc_f64_s\t$dst, $src", "i32.trunc_f64_s",
0xaa>;
defm I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
- [], "i32.trunc_u/f64\t$dst, $src", "i32.trunc_u/f64",
+ [], "i32.trunc_f64_u\t$dst, $src", "i32.trunc_f64_u",
0xab>;
defm I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
- [], "i64.trunc_s/f64\t$dst, $src", "i64.trunc_s/f64",
+ [], "i64.trunc_f64_s\t$dst, $src", "i64.trunc_f64_s",
0xb0>;
defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
- [], "i64.trunc_u/f64\t$dst, $src", "i64.trunc_u/f64",
+ [], "i64.trunc_f64_u\t$dst, $src", "i64.trunc_f64_u",
0xb1>;
} // hasSideEffects = 1
defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (sint_to_fp I32:$src))],
- "f32.convert_s/i32\t$dst, $src", "f32.convert_s/i32",
+ "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s",
0xb2>;
defm F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (uint_to_fp I32:$src))],
- "f32.convert_u/i32\t$dst, $src", "f32.convert_u/i32",
+ "f32.convert_i32_u\t$dst, $src", "f32.convert_i32_u",
0xb3>;
defm F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
[(set F64:$dst, (sint_to_fp I32:$src))],
- "f64.convert_s/i32\t$dst, $src", "f64.convert_s/i32",
+ "f64.convert_i32_s\t$dst, $src", "f64.convert_i32_s",
0xb7>;
defm F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
[(set F64:$dst, (uint_to_fp I32:$src))],
- "f64.convert_u/i32\t$dst, $src", "f64.convert_u/i32",
+ "f64.convert_i32_u\t$dst, $src", "f64.convert_i32_u",
0xb8>;
defm F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
[(set F32:$dst, (sint_to_fp I64:$src))],
- "f32.convert_s/i64\t$dst, $src", "f32.convert_s/i64",
+ "f32.convert_i64_s\t$dst, $src", "f32.convert_i64_s",
0xb4>;
defm F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
[(set F32:$dst, (uint_to_fp I64:$src))],
- "f32.convert_u/i64\t$dst, $src", "f32.convert_u/i64",
+ "f32.convert_i64_u\t$dst, $src", "f32.convert_i64_u",
0xb5>;
defm F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (sint_to_fp I64:$src))],
- "f64.convert_s/i64\t$dst, $src", "f64.convert_s/i64",
+ "f64.convert_i64_s\t$dst, $src", "f64.convert_i64_s",
0xb9>;
defm F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (uint_to_fp I64:$src))],
- "f64.convert_u/i64\t$dst, $src", "f64.convert_u/i64",
+ "f64.convert_i64_u\t$dst, $src", "f64.convert_i64_u",
0xba>;
defm F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), (outs), (ins),
[(set F64:$dst, (fpextend F32:$src))],
- "f64.promote/f32\t$dst, $src", "f64.promote/f32",
+ "f64.promote_f32\t$dst, $src", "f64.promote_f32",
0xbb>;
defm F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), (outs), (ins),
[(set F32:$dst, (fpround F64:$src))],
- "f32.demote/f64\t$dst, $src", "f32.demote/f64",
+ "f32.demote_f64\t$dst, $src", "f32.demote_f64",
0xb6>;
defm I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
[(set I32:$dst, (bitconvert F32:$src))],
- "i32.reinterpret/f32\t$dst, $src",
- "i32.reinterpret/f32", 0xbc>;
+ "i32.reinterpret_f32\t$dst, $src",
+ "i32.reinterpret_f32", 0xbc>;
defm F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
[(set F32:$dst, (bitconvert I32:$src))],
- "f32.reinterpret/i32\t$dst, $src",
- "f32.reinterpret/i32", 0xbe>;
+ "f32.reinterpret_i32\t$dst, $src",
+ "f32.reinterpret_i32", 0xbe>;
defm I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
[(set I64:$dst, (bitconvert F64:$src))],
- "i64.reinterpret/f64\t$dst, $src",
- "i64.reinterpret/f64", 0xbd>;
+ "i64.reinterpret_f64\t$dst, $src",
+ "i64.reinterpret_f64", 0xbd>;
defm F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
[(set F64:$dst, (bitconvert I64:$src))],
- "f64.reinterpret/i64\t$dst, $src",
- "f64.reinterpret/i64", 0xbf>;
-
-} // Defs = [ARGUMENTS]
+ "f64.reinterpret_i64\t$dst, $src",
+ "f64.reinterpret_i64", 0xbf>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
index 41b39f69e51c..a251d60b89ee 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
@@ -12,8 +12,6 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
-
defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
(ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
(outs), (ins),
@@ -23,8 +21,6 @@ defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
"except_ref.select\t$dst, $lhs, $rhs, $cond",
"except_ref.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
(SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 8db75d38942b..c5290f00b431 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,7 +12,38 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
+ bits<32> f64Inst> {
+ defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
+ [(set F32:$dst, (node F32:$src))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
+ [(set F64:$dst, (node F64:$src))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("f64.", name), f64Inst>;
+}
+multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
+ bits<32> f64Inst> {
+ defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+ [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+ [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f64.", name), f64Inst>;
+}
+multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
+ defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f32.", name), f32Inst>;
+ defm _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("f64.", name), f64Inst>;
+}
let isCommutable = 1 in
defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>;
@@ -27,8 +58,8 @@ defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>;
defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>;
let isCommutable = 1 in {
-defm MIN : BinaryFP<fminnan, "min ", 0x96, 0xa4>;
-defm MAX : BinaryFP<fmaxnan, "max ", 0x97, 0xa5>;
+defm MIN : BinaryFP<fminimum, "min ", 0x96, 0xa4>;
+defm MAX : BinaryFP<fmaximum, "max ", 0x97, 0xa5>;
} // isCommutable = 1
defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>;
@@ -36,8 +67,6 @@ defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>;
defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
-} // Defs = [ARGUMENTS]
-
// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
def : Pat<(fcopysign F64:$lhs, F32:$rhs),
(COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
@@ -48,8 +77,6 @@ def : Pat<(fcopysign F32:$lhs, F64:$rhs),
def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
-let Defs = [ARGUMENTS] in {
-
let isCommutable = 1 in {
defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>;
defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>;
@@ -59,8 +86,6 @@ defm LE : ComparisonFP<SETOLE, "le ", 0x5f, 0x65>;
defm GT : ComparisonFP<SETOGT, "gt ", 0x5e, 0x64>;
defm GE : ComparisonFP<SETOGE, "ge ", 0x60, 0x66>;
-} // Defs = [ARGUMENTS]
-
// Don't care floating-point comparisons, supported via other comparisons.
def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
@@ -75,8 +100,6 @@ def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
-let Defs = [ARGUMENTS] in {
-
defm SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond),
(outs), (ins),
[(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
@@ -86,8 +109,6 @@ defm SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond),
[(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
"f64.select\t$dst, $lhs, $rhs, $cond", "f64.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
// ISD::SELECT requires its operand to conform to getBooleanContents, but
// WebAssembly's select interprets any non-zero value as true, so we can fold
// a setne with 0 into a select.
@@ -101,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
(SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
(SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 403152c80660..15a9714a55a1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -15,21 +15,24 @@
// WebAssembly Instruction Format.
// We instantiate 2 of these for every actual instruction (register based
// and stack based), see below.
-class WebAssemblyInst<bits<32> inst, string asmstr, bit stack> : Instruction {
- field bits<32> Inst = inst; // Instruction encoding.
- field bit StackBased = stack;
+class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
+ Instruction {
+ bits<32> Inst = inst; // Instruction encoding.
+ string StackBased = stack;
+ string BaseName = NAME;
let Namespace = "WebAssembly";
let Pattern = [];
let AsmString = asmstr;
}
// Normal instructions. Default instantiation of a WebAssemblyInst.
-class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
- bits<32> inst = -1>
+class NI<dag oops, dag iops, list<dag> pattern, string stack,
+ string asmstr = "", bits<32> inst = -1>
: WebAssemblyInst<inst, asmstr, stack> {
dag OutOperandList = oops;
dag InOperandList = iops;
let Pattern = pattern;
+ let Defs = [ARGUMENTS];
}
// Generates both register and stack based versions of one actual instruction.
@@ -37,10 +40,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
// based version of this instruction, as well as the corresponding asmstr.
// The register versions have virtual-register operands which correspond to wasm
// locals or stack locations. Each use and def of the register corresponds to an
-// implicit get_local / set_local or access of stack operands in wasm. These
+// implicit local.get / local.set or access of stack operands in wasm. These
// instructions are used for ISel and all MI passes. The stack versions of the
// instructions do not have register operands (they implicitly operate on the
-// stack), and get_locals and set_locals are explicit. The register instructions
+// stack), and local.gets and local.sets are explicit. The register instructions
// are converted to their corresponding stack instructions before lowering to
// MC.
// Every instruction should want to be based on this multi-class to guarantee
@@ -48,8 +51,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "",
bits<32> inst = -1> {
- def "" : NI<oops_r, iops_r, pattern_r, 0, asmstr_r, inst>;
- def _S : NI<oops_s, iops_s, [], 1, asmstr_s, inst>;
+ let isCodeGenOnly = 1 in
+ def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>;
+ let BaseName = NAME in
+ def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>;
}
// For instructions that have no register ops, so both sets are the same.
@@ -57,111 +62,3 @@ multiclass NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
bits<32> inst = -1> {
defm "": I<oops, iops, oops, iops, pattern, asmstr, asmstr, inst>;
}
-
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> inst = -1> {
- defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- inst>,
- Requires<[HasSIMD128]>;
-}
-
-multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> inst = -1> {
- defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- inst>,
- Requires<[HasAtomics]>;
-}
-
-// Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
- bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
- [(set I32:$dst, (node I32:$src))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
- [(set I64:$dst, (node I64:$src))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
- bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
- [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
- [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
- bits<32> f64Inst> {
- defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
- [(set F32:$dst, (node F32:$src))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
- [(set F64:$dst, (node F64:$src))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
- !strconcat("f64.", name), f64Inst>;
-}
-multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
- bits<32> f64Inst> {
- defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
- [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
- [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f64.", name), f64Inst>;
-}
-multiclass SIMDBinary<SDNode node, SDNode fnode, string name> {
- defm _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i8x16.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i8x16.", name)>;
- defm _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i16x8.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i16x8.", name)>;
- defm _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))],
- !strconcat("i32x4.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32x4.", name)>;
- defm _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
- (outs), (ins),
- [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))],
- !strconcat("f32x4.",
- !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32x4.", name)>;
-}
-multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
- defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
- !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i32.", name), i32Inst>;
- defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
- !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("i64.", name), i64Inst>;
-}
-multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
- defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
- !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f32.", name), f32Inst>;
- defm _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
- [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
- !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
- !strconcat("f64.", name), f64Inst>;
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index cd49bd1682ad..5efff32d6167 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -70,6 +70,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
CopyOpcode = WebAssembly::COPY_F32;
else if (RC == &WebAssembly::F64RegClass)
CopyOpcode = WebAssembly::COPY_F64;
+ else if (RC == &WebAssembly::V128RegClass)
+ CopyOpcode = WebAssembly::COPY_V128;
else
llvm_unreachable("Unexpected register class");
@@ -77,10 +79,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, KillSrc ? RegState::Kill : 0);
}
-MachineInstr *
-WebAssemblyInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
- unsigned OpIdx1,
- unsigned OpIdx2) const {
+MachineInstr *WebAssemblyInstrInfo::commuteInstructionImpl(
+ MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const {
// If the operands are stackified, we can't reorder them.
WebAssemblyFunctionInfo &MFI =
*MI.getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
@@ -165,12 +165,9 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
return Count;
}
-unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
+unsigned WebAssemblyInstrInfo::insertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
assert(!BytesAdded && "code size not handled");
if (Cond.empty()) {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index aeb282a7febb..e3d795f2aab1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -20,6 +20,9 @@ def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
AssemblerPredicate<"FeatureSIMD128", "simd128">;
+def HasUnimplementedSIMD128 :
+ Predicate<"Subtarget->hasUnimplementedSIMD128()">,
+ AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
AssemblerPredicate<"FeatureAtomics", "atomics">;
def HasNontrappingFPToInt :
@@ -64,6 +67,7 @@ def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
@@ -90,6 +94,8 @@ def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
SDT_WebAssemblyReturn, [SDNPHasChain]>;
def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
+def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
+ [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
@@ -118,6 +124,18 @@ def f32imm_op : Operand<f32>;
let OperandType = "OPERAND_F64IMM" in
def f64imm_op : Operand<f64>;
+let OperandType = "OPERAND_VEC_I8IMM" in
+def vec_i8imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I16IMM" in
+def vec_i16imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I32IMM" in
+def vec_i32imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I64IMM" in
+def vec_i64imm_op : Operand<i64>;
+
let OperandType = "OPERAND_FUNCTION32" in
def function32_op : Operand<i32>;
@@ -128,6 +146,10 @@ let OperandType = "OPERAND_P2ALIGN" in {
def P2Align : Operand<i32> {
let PrintMethod = "printWebAssemblyP2AlignOperand";
}
+
+let OperandType = "OPERAND_EVENT" in
+def event_op : Operand<i32>;
+
} // OperandType = "OPERAND_P2ALIGN"
let OperandType = "OPERAND_SIGNATURE" in {
@@ -142,6 +164,19 @@ def TypeIndex : Operand<i32>;
} // OperandNamespace = "WebAssembly"
//===----------------------------------------------------------------------===//
+// WebAssembly Register to Stack instruction mapping
+//===----------------------------------------------------------------------===//
+
+class StackRel;
+def getStackOpcode : InstrMapping {
+ let FilterClass = "StackRel";
+ let RowFields = ["BaseName"];
+ let ColFields = ["StackBased"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
@@ -151,74 +186,62 @@ include "WebAssemblyInstrFormats.td"
// Additional instructions.
//===----------------------------------------------------------------------===//
-multiclass ARGUMENT<WebAssemblyRegClass vt> {
- let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
- defm ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
- (outs), (ins i32imm:$argno),
- [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
+ let hasSideEffects = 1, isCodeGenOnly = 1,
+ Defs = []<Register>, Uses = [ARGUMENTS] in
+ defm ARGUMENT_#vt :
+ I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
+ [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
}
-multiclass SIMD_ARGUMENT<ValueType vt> {
- let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
- defm ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno),
- (outs), (ins i32imm:$argno),
- [(set (vt V128:$res),
- (WebAssemblyargument timm:$argno))]>;
-}
-defm "": ARGUMENT<I32>;
-defm "": ARGUMENT<I64>;
-defm "": ARGUMENT<F32>;
-defm "": ARGUMENT<F64>;
-defm "": ARGUMENT<EXCEPT_REF>;
-defm "": SIMD_ARGUMENT<v16i8>;
-defm "": SIMD_ARGUMENT<v8i16>;
-defm "": SIMD_ARGUMENT<v4i32>;
-defm "": SIMD_ARGUMENT<v4f32>;
-
-let Defs = [ARGUMENTS] in {
-
-// get_local and set_local are not generated by instruction selection; they
+defm "": ARGUMENT<I32, i32>;
+defm "": ARGUMENT<I64, i64>;
+defm "": ARGUMENT<F32, f32>;
+defm "": ARGUMENT<F64, f64>;
+defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+
+// local.get and local.set are not generated by instruction selection; they
// are implied by virtual register uses and defs.
multiclass LOCAL<WebAssemblyRegClass vt> {
let hasSideEffects = 0 in {
- // COPY is not an actual instruction in wasm, but since we allow get_local and
- // set_local to be implicit during most of codegen, we can have a COPY which
- // is actually a no-op because all the work is done in the implied get_local
- // and set_local. COPYs are eliminated (and replaced with
- // get_local/set_local) in the ExplicitLocals pass.
+ // COPY is not an actual instruction in wasm, but since we allow local.get and
+ // local.set to be implicit during most of codegen, we can have a COPY which
+ // is actually a no-op because all the work is done in the implied local.get
+ // and local.set. COPYs are eliminated (and replaced with
+ // local.get/local.set) in the ExplicitLocals pass.
let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
defm COPY_#vt : I<(outs vt:$res), (ins vt:$src), (outs), (ins), [],
- "copy_local\t$res, $src", "copy_local">;
+ "local.copy\t$res, $src", "local.copy">;
// TEE is similar to COPY, but writes two copies of its result. Typically
// this would be used to stackify one result and write the other result to a
// local.
let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
defm TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), (outs), (ins), [],
- "tee_local\t$res, $also, $src", "tee_local">;
+ "local.tee\t$res, $also, $src", "local.tee">;
- // This is the actual get_local instruction in wasm. These are made explicit
+ // This is the actual local.get instruction in wasm. These are made explicit
// by the ExplicitLocals pass. It has mayLoad because it reads from a wasm
// local, which is a side effect not otherwise modeled in LLVM.
let mayLoad = 1, isAsCheapAsAMove = 1 in
- defm GET_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local),
+ defm LOCAL_GET_#vt : I<(outs vt:$res), (ins local_op:$local),
(outs), (ins local_op:$local), [],
- "get_local\t$res, $local", "get_local\t$local", 0x20>;
+ "local.get\t$res, $local", "local.get\t$local", 0x20>;
- // This is the actual set_local instruction in wasm. These are made explicit
+ // This is the actual local.set instruction in wasm. These are made explicit
// by the ExplicitLocals pass. It has mayStore because it writes to a wasm
// local, which is a side effect not otherwise modeled in LLVM.
let mayStore = 1, isAsCheapAsAMove = 1 in
- defm SET_LOCAL_#vt : I<(outs), (ins local_op:$local, vt:$src),
+ defm LOCAL_SET_#vt : I<(outs), (ins local_op:$local, vt:$src),
(outs), (ins local_op:$local), [],
- "set_local\t$local, $src", "set_local\t$local", 0x21>;
+ "local.set\t$local, $src", "local.set\t$local", 0x21>;
- // This is the actual tee_local instruction in wasm. TEEs are turned into
- // TEE_LOCALs by the ExplicitLocals pass. It has mayStore for the same reason
- // as SET_LOCAL.
+ // This is the actual local.tee instruction in wasm. TEEs are turned into
+ // LOCAL_TEEs by the ExplicitLocals pass. It has mayStore for the same reason
+ // as LOCAL_SET.
let mayStore = 1, isAsCheapAsAMove = 1 in
- defm TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
+ defm LOCAL_TEE_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
(outs), (ins local_op:$local), [],
- "tee_local\t$res, $local, $src", "tee_local\t$local",
+ "local.tee\t$res, $local, $src", "local.tee\t$local",
0x22>;
// Unused values must be dropped in some contexts.
@@ -226,15 +249,15 @@ let hasSideEffects = 0 in {
"drop\t$src", "drop", 0x1a>;
let mayLoad = 1 in
- defm GET_GLOBAL_#vt : I<(outs vt:$res), (ins global_op:$local),
+ defm GLOBAL_GET_#vt : I<(outs vt:$res), (ins global_op:$local),
(outs), (ins global_op:$local), [],
- "get_global\t$res, $local", "get_global\t$local",
+ "global.get\t$res, $local", "global.get\t$local",
0x23>;
let mayStore = 1 in
- defm SET_GLOBAL_#vt : I<(outs), (ins global_op:$local, vt:$src),
+ defm GLOBAL_SET_#vt : I<(outs), (ins global_op:$local, vt:$src),
(outs), (ins global_op:$local), [],
- "set_global\t$local, $src", "set_global\t$local",
+ "global.set\t$local, $src", "global.set\t$local",
0x24>;
} // hasSideEffects = 0
@@ -265,12 +288,12 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
"f64.const\t$res, $imm", "f64.const\t$imm", 0x44>;
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
-} // Defs = [ARGUMENTS]
-
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>;
def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
(CONST_I32 texternalsym:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
+def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index f9f21fd1d754..d5b63d643697 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,7 +12,38 @@
///
//===----------------------------------------------------------------------===//
-let Defs = [ARGUMENTS] in {
+multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
+ bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
+ [(set I32:$dst, (node I32:$src))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
+ [(set I64:$dst, (node I64:$src))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
+ !strconcat("i64.", name), i64Inst>;
+}
+multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
+ bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+ [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+ [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i64.", name), i64Inst>;
+}
+multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
+ defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i32.", name), i32Inst>;
+ defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+ [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+ !strconcat("i64.", name), i64Inst>;
+}
// The spaces after the names are for aesthetic purposes only, to make
// operands line up vertically after tab expansion.
@@ -63,16 +94,12 @@ defm EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
[(set I32:$dst, (setcc I64:$src, 0, SETEQ))],
"i64.eqz \t$dst, $src", "i64.eqz", 0x50>;
-} // Defs = [ARGUMENTS]
-
// Optimize away an explicit mask on a rotate count.
def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>;
def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
-let Defs = [ARGUMENTS] in {
-
defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
(outs), (ins),
[(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
@@ -82,8 +109,6 @@ defm SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond),
[(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
"i64.select\t$dst, $lhs, $rhs, $cond", "i64.select", 0x1b>;
-} // Defs = [ARGUMENTS]
-
// ISD::SELECT requires its operand to conform to getBooleanContents, but
// WebAssembly's select interprets any non-zero value as true, so we can fold
// a setne with 0 into a select.
@@ -97,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
(SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
(SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 8a49325af2bd..518f81c61dc4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -33,10 +33,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- KnownBits Known0;
- CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
- KnownBits Known1;
- CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+ KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
return (~Known0.Zero & ~Known1.Zero) == 0;
}]>;
@@ -53,15 +51,14 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
// We don't need a regPlusES because external symbols never have constant
// offsets folded into them, so we can just use add.
-let Defs = [ARGUMENTS] in {
-
// Defines atomic and non-atomic loads, regular and extending.
multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
+ let mayLoad = 1 in
defm "": I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+ !strconcat(Name, "\t${off}${p2align}"), Opcode>;
}
// Basic load.
@@ -72,8 +69,6 @@ defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
-} // Defs = [ARGUMENTS]
-
// Select loads with no constant offset.
class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>;
@@ -143,8 +138,6 @@ def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
-let Defs = [ARGUMENTS] in {
-
// Extending load.
defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
@@ -157,8 +150,6 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>;
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
-} // Defs = [ARGUMENTS]
-
// Select extending loads with no constant offset.
def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>;
def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>;
@@ -302,17 +293,15 @@ def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
-
-let Defs = [ARGUMENTS] in {
-
// Defines atomic and non-atomic stores, regular and truncating
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
+ let mayStore = 1 in
defm "" : I<(outs),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs),
(ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+ !strconcat(Name, "\t${off}${p2align}"), Opcode>;
}
// Basic store.
// Note: WebAssembly inverts SelectionDAG's usual operand order.
@@ -321,8 +310,6 @@ defm STORE_I64 : WebAssemblyStore<I64, "i64.store", 0x37>;
defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>;
defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>;
-} // Defs = [ARGUMENTS]
-
// Select stores with no constant offset.
class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> :
Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>;
@@ -387,9 +374,6 @@ def : StorePatExternSymOffOnly<i64, store, STORE_I64>;
def : StorePatExternSymOffOnly<f32, store, STORE_F32>;
def : StorePatExternSymOffOnly<f64, store, STORE_F64>;
-
-let Defs = [ARGUMENTS] in {
-
// Truncating store.
defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>;
@@ -397,8 +381,6 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
-} // Defs = [ARGUMENTS]
-
// Select truncating stores with no constant offset.
def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>;
def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>;
@@ -446,8 +428,6 @@ def : StorePatExternSymOffOnly<i64, truncstorei8, STORE8_I64>;
def : StorePatExternSymOffOnly<i64, truncstorei16, STORE16_I64>;
def : StorePatExternSymOffOnly<i64, truncstorei32, STORE32_I64>;
-let Defs = [ARGUMENTS] in {
-
// Current memory size.
defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
(outs), (ins i32imm:$flags),
@@ -456,44 +436,13 @@ defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
"memory.size\t$dst, $flags", "memory.size\t$flags",
0x3f>,
Requires<[HasAddr32]>;
-defm MEM_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
- (outs), (ins i32imm:$flags),
- [(set I32:$dst, (int_wasm_mem_size (i32 imm:$flags)))],
- "mem.size\t$dst, $flags", "mem.size\t$flags", 0x3f>,
- Requires<[HasAddr32]>;
-defm CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
- (outs), (ins i32imm:$flags),
- [],
- "current_memory\t$dst",
- "current_memory\t$flags", 0x3f>,
- Requires<[HasAddr32]>;
// Grow memory.
defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags, I32:$delta),
+ (outs), (ins i32imm:$flags),
[(set I32:$dst,
(int_wasm_memory_grow (i32 imm:$flags),
I32:$delta))],
"memory.grow\t$dst, $flags, $delta",
- "memory.grow\t$flags, $delta", 0x3f>,
+ "memory.grow\t$flags", 0x40>,
Requires<[HasAddr32]>;
-defm MEM_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags),
- [(set I32:$dst,
- (int_wasm_mem_grow (i32 imm:$flags), I32:$delta))],
- "mem.grow\t$dst, $flags, $delta", "mem.grow\t$flags",
- 0x3f>,
- Requires<[HasAddr32]>;
-defm GROW_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
- (outs), (ins i32imm:$flags),
- [],
- "grow_memory\t$dst, $delta", "grow_memory\t$flags",
- 0x40>,
- Requires<[HasAddr32]>;
-
-} // Defs = [ARGUMENTS]
-
-def : Pat<(int_wasm_current_memory),
- (CURRENT_MEMORY_I32 0)>;
-def : Pat<(int_wasm_grow_memory I32:$delta),
- (GROW_MEMORY_I32 0, $delta)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 7d1edccdeb3c..587515c5b299 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -12,8 +12,796 @@
///
//===----------------------------------------------------------------------===//
+// Instructions requiring HasSIMD128 and the simd128 prefix byte
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ !or(0xfd00, !and(0xff, simdop))>,
+ Requires<[HasSIMD128]>;
+}
+
+defm "" : ARGUMENT<V128, v16i8>;
+defm "" : ARGUMENT<V128, v8i16>;
+defm "" : ARGUMENT<V128, v4i32>;
+defm "" : ARGUMENT<V128, v2i64>;
+defm "" : ARGUMENT<V128, v4f32>;
+defm "" : ARGUMENT<V128, v2f64>;
+
+// Constrained immediate argument types
+foreach SIZE = [8, 16] in
+def ImmI#SIZE : ImmLeaf<i32,
+ "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
+>;
+foreach SIZE = [2, 4, 8, 16, 32] in
+def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
+
+//===----------------------------------------------------------------------===//
+// Load and store
+//===----------------------------------------------------------------------===//
+
+// Load: v128.load
+multiclass SIMDLoad<ValueType vec_t> {
+ let mayLoad = 1 in
+ defm LOAD_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$align, offset32_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$align",
+ "v128.load\t$off$align", 0>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDLoad<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+}
+
+// Store: v128.store
+multiclass SIMDStore<ValueType vec_t> {
+ let mayStore = 1 in
+ defm STORE_#vec_t :
+ SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$align, offset32_op:$off), [],
+ "v128.store\t${off}(${addr})$align, $vec",
+ "v128.store\t$off$align", 1>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDStore<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Constructing SIMD values
+//===----------------------------------------------------------------------===//
+
+// Constant: v128.const
+multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
+ let isMoveImm = 1, isReMaterializable = 1,
+ Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+ defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
+ [(set V128:$dst, (vec_t pat))],
+ "v128.const\t$dst, "#args,
+ "v128.const\t"#args, 2>;
+}
+
+defm "" : ConstVec<v16i8,
+ (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
+ vec_i8imm_op:$i2, vec_i8imm_op:$i3,
+ vec_i8imm_op:$i4, vec_i8imm_op:$i5,
+ vec_i8imm_op:$i6, vec_i8imm_op:$i7,
+ vec_i8imm_op:$i8, vec_i8imm_op:$i9,
+ vec_i8imm_op:$iA, vec_i8imm_op:$iB,
+ vec_i8imm_op:$iC, vec_i8imm_op:$iD,
+ vec_i8imm_op:$iE, vec_i8imm_op:$iF),
+ (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
+ ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
+ ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
+ ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
+ !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
+ "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
+defm "" : ConstVec<v8i16,
+ (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
+ vec_i16imm_op:$i2, vec_i16imm_op:$i3,
+ vec_i16imm_op:$i4, vec_i16imm_op:$i5,
+ vec_i16imm_op:$i6, vec_i16imm_op:$i7),
+ (build_vector
+ ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
+ ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
+ "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
+defm "" : ConstVec<v4i32,
+ (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
+ vec_i32imm_op:$i2, vec_i32imm_op:$i3),
+ (build_vector (i32 imm:$i0), (i32 imm:$i1),
+ (i32 imm:$i2), (i32 imm:$i3)),
+ "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2i64,
+ (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
+ (build_vector (i64 imm:$i0), (i64 imm:$i1)),
+ "$i0, $i1">;
+defm "" : ConstVec<v4f32,
+ (ins f32imm_op:$i0, f32imm_op:$i1,
+ f32imm_op:$i2, f32imm_op:$i3),
+ (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
+ (f32 fpimm:$i2), (f32 fpimm:$i3)),
+ "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2f64,
+ (ins f64imm_op:$i0, f64imm_op:$i1),
+ (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
+ "$i0, $i1">;
+
+// Shuffle lanes: shuffle
+defm SHUFFLE :
+ SIMD_I<(outs V128:$dst),
+ (ins V128:$x, V128:$y,
+ vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+ vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+ vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+ vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+ vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+ vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+ vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+ vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+ (outs),
+ (ins
+ vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+ vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+ vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+ vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+ vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+ vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+ vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+ vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+ [],
+ "v8x16.shuffle\t$dst, $x, $y, "#
+ "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+ "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+ "v8x16.shuffle\t"#
+ "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+ "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+ 3>;
+
+// Shuffles after custom lowering
+def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
+def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
+ (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+ (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+ (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+ (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+ (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+ (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+ (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+ (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
+ (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y),
+ (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+ (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+ (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+ (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+ (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+ (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+ (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+ (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
+}
+
+// Create vector with identical lanes: splat
+def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
+def splat4 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x)>;
+def splat8 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x)>;
+def splat16 : PatFrag<(ops node:$x), (build_vector
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x,
+ node:$x, node:$x, node:$x, node:$x)>;
+
+multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+ PatFrag splat_pat, bits<32> simdop> {
+ // Prefer splats over v128.const for const splats (65 is lowest that works)
+ let AddedComplexity = 65 in
+ defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
+ [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
+ vec#".splat\t$dst, $x", vec#".splat", simdop>;
+}
+
+defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>;
+defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>;
+defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>;
+defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
+defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
+defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
+
+//===----------------------------------------------------------------------===//
+// Accessing lanes
+//===----------------------------------------------------------------------===//
+
+// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
+multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+ WebAssemblyRegClass reg_t, bits<32> simdop,
+ string suffix = "", SDNode extract = vector_extract> {
+ defm EXTRACT_LANE_#vec_t#suffix :
+ SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+ vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
+ vec#".extract_lane"#suffix#"\t$idx", simdop>;
+}
+
+multiclass ExtractPat<ValueType lane_t, int mask> {
+ def _s : PatFrag<(ops node:$vec, node:$idx),
+ (i32 (sext_inreg
+ (i32 (vector_extract
+ node:$vec,
+ node:$idx
+ )),
+ lane_t
+ ))>;
+ def _u : PatFrag<(ops node:$vec, node:$idx),
+ (i32 (and
+ (i32 (vector_extract
+ node:$vec,
+ node:$idx
+ )),
+ (i32 mask)
+ ))>;
+}
+
+defm extract_i8x16 : ExtractPat<i8, 0xff>;
+defm extract_i16x8 : ExtractPat<i16, 0xffff>;
+
+multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
+ defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
+ !cast<PatFrag>("extract_i8x16"#sign)>;
+ defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
+ !cast<PatFrag>("extract_i16x8"#sign)>;
+}
+
+defm "" : ExtractLaneExtended<"_s", 5>;
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm "" : ExtractLaneExtended<"_u", 6>;
+defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
+defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
+defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
+defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
+
+// It would be more conventional to use unsigned extracts, but v8
+// doesn't implement them yet
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
+ (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
+ (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
+
+// Lower undef lane indices to zero
+def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
+ (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
+ (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), undef),
+ (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), undef),
+ (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), undef),
+ (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), undef),
+ (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+
+// Replace lane value: replace_lane
+multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+ WebAssemblyRegClass reg_t, ValueType lane_t,
+ bits<32> simdop> {
+ defm REPLACE_LANE_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set V128:$dst, (vector_insert
+ (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
+ vec#".replace_lane\t$dst, $vec, $idx, $x",
+ vec#".replace_lane\t$idx", simdop>;
+}
+
+defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>;
+defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>;
+defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>;
+defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>;
+defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>;
+defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>;
+
+// Lower undef lane indices to zero
+def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
+ (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
+ (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
+def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
+ (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
+def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
+ (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
+
+// Arbitrary other BUILD_VECTOR patterns
+def : Pat<(v16i8 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+ (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
+ (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
+ (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
+ )),
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (REPLACE_LANE_v16i8
+ (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ )),
+ 4, I32:$x4
+ )),
+ 5, I32:$x5
+ )),
+ 6, I32:$x6
+ )),
+ 7, I32:$x7
+ )),
+ 8, I32:$x8
+ )),
+ 9, I32:$x9
+ )),
+ 10, I32:$x10
+ )),
+ 11, I32:$x11
+ )),
+ 12, I32:$x12
+ )),
+ 13, I32:$x13
+ )),
+ 14, I32:$x14
+ )),
+ 15, I32:$x15
+ ))>;
+def : Pat<(v8i16 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+ (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
+ )),
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (REPLACE_LANE_v8i16
+ (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ )),
+ 4, I32:$x4
+ )),
+ 5, I32:$x5
+ )),
+ 6, I32:$x6
+ )),
+ 7, I32:$x7
+ ))>;
+def : Pat<(v4i32 (build_vector
+ (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
+ )),
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (REPLACE_LANE_v4i32
+ (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
+ 1, I32:$x1
+ )),
+ 2, I32:$x2
+ )),
+ 3, I32:$x3
+ ))>;
+def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
+ (v2i64 (REPLACE_LANE_v2i64
+ (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
+def : Pat<(v4f32 (build_vector
+ (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
+ )),
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (REPLACE_LANE_v4f32
+ (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
+ 1, F32:$x1
+ )),
+ 2, F32:$x2
+ )),
+ 3, F32:$x3
+ ))>;
+def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
+ (v2f64 (REPLACE_LANE_v2f64
+ (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
+ string name, CondCode cond, bits<32> simdop> {
+ defm _#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+ [(set (out_t V128:$dst),
+ (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
+ )],
+ vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
+}
+
+multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
+ defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
+ defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
+ !add(baseInst, 10)>;
+ defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
+ !add(baseInst, 20)>;
+}
+
+multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
+ defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
+ defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
+ !add(baseInst, 6)>;
+}
+
+// Equality: eq
+let isCommutable = 1 in {
+defm EQ : SIMDConditionInt<"eq", SETEQ, 24>;
+defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>;
+} // isCommutable = 1
+
+// Non-equality: ne
let isCommutable = 1 in {
-defm ADD : SIMDBinary<add, fadd, "add ">;
-defm MUL: SIMDBinary<mul, fmul, "mul ">;
+defm NE : SIMDConditionInt<"ne", SETNE, 25>;
+defm NE : SIMDConditionFP<"ne", SETUNE, 65>;
} // isCommutable = 1
-defm SUB: SIMDBinary<sub, fsub, "sub ">;
+
+// Less than: lt_s / lt_u / lt
+defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>;
+defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>;
+defm LT : SIMDConditionFP<"lt", SETOLT, 66>;
+
+// Greater than: gt_s / gt_u / gt
+defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>;
+defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>;
+defm GT : SIMDConditionFP<"gt", SETOGT, 67>;
+
+// Less than or equal: le_s / le_u / le
+defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>;
+defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>;
+defm LE : SIMDConditionFP<"le", SETOLE, 68>;
+
+// Greater than or equal: ge_s / ge_u / ge
+defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>;
+defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
+defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
+
+// Lower float comparisons that don't care about NaN to standard WebAssembly
+// float comparisons. These instructions are generated in the target-independent
+// expansion of unordered comparisons and ordered ne.
+def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise operations
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (node (vec_t V128:$lhs), (vec_t V128:$rhs))
+ )],
+ vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
+ simdop>;
+}
+
+multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
+ defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
+ defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
+}
+
+multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (vec_t (node (vec_t V128:$vec)))
+ )],
+ vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+// Bitwise logic: v128.not
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>;
+
+// Bitwise logic: v128.and / v128.or / v128.xor
+let isCommutable = 1 in {
+defm AND : SIMDBitwise<and, "and", 77>;
+defm OR : SIMDBitwise<or, "or", 78>;
+defm XOR : SIMDBitwise<xor, "xor", 79>;
+} // isCommutable = 1
+
+// Bitwise select: v128.bitselect
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+ defm BITSELECT_#vec_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (vec_t (int_wasm_bitselect
+ (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
+ ))
+ )],
+ "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>;
+
+// Bitselect is equivalent to (c & v1) | (~c & v2)
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+ def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
+ (and (vnot V128:$c), (vec_t V128:$v2)))),
+ (!cast<Instruction>("BITSELECT_"#vec_t)
+ V128:$v1, V128:$v2, V128:$c)>;
+
+//===----------------------------------------------------------------------===//
+// Integer unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
+ defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+ defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+ defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
+ bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+ [(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
+ vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
+ defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
+ defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>;
+ defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>;
+ defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>;
+}
+
+// Integer vector negation
+def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+
+// Integer negation: neg
+defm NEG : SIMDUnaryInt<ivneg, "neg", 81>;
+
+// Any lane true: any_true
+defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
+
+// All lanes true: all_true
+defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
+
+//===----------------------------------------------------------------------===//
+// Bit shifts
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
+ string name, bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
+ (outs), (ins),
+ [(set (vec_t V128:$dst),
+ (node V128:$vec, (vec_t shift_vec)))],
+ vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
+}
+
+multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
+ defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
+ !add(baseInst, 17)>;
+ defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
+ !add(baseInst, 34)>;
+ defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
+ name, !add(baseInst, 51)>;
+}
+
+// Left shift by scalar: shl
+defm SHL : SIMDShiftInt<shl, "shl", 84>;
+
+// Right shift by scalar: shr_s / shr_u
+defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
+defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
+
+// Truncate i64 shift operands to i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
+ (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+
+// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
+def wasm_shift_t : SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
+>;
+def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
+def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
+def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
+foreach shifts = [[wasm_shl, SHL_v2i64],
+ [wasm_shr_s, SHR_S_v2i64],
+ [wasm_shr_u, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)),
+ (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>;
+
+//===----------------------------------------------------------------------===//
+// Integer binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
+ defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+}
+
+multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
+ defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+}
+
+multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
+ defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+// Integer addition: add / add_saturate_s / add_saturate_u
+let isCommutable = 1 in {
+defm ADD : SIMDBinaryInt<add, "add", 87>;
+defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>;
+defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>;
+} // isCommutable = 1
+
+// Integer subtraction: sub / sub_saturate_s / sub_saturate_u
+defm SUB : SIMDBinaryInt<sub, "sub", 90>;
+defm SUB_SAT_S :
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>;
+defm SUB_SAT_U :
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>;
+
+// Integer multiplication: mul
+defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
+ defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Absolute value: abs
+defm ABS : SIMDUnaryFP<fabs, "abs", 149>;
+
+// Negation: neg
+defm NEG : SIMDUnaryFP<fneg, "neg", 150>;
+
+// Square root: sqrt
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
+ defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Addition: add
+let isCommutable = 1 in
+defm ADD : SIMDBinaryFP<fadd, "add", 154>;
+
+// Subtraction: sub
+defm SUB : SIMDBinaryFP<fsub, "sub", 155>;
+
+// Multiplication: mul
+let isCommutable = 1 in
+defm MUL : SIMDBinaryFP<fmul, "mul", 156>;
+
+// Division: div
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm DIV : SIMDBinaryFP<fdiv, "div", 157>;
+
+// NaN-propagating minimum: min
+defm MIN : SIMDBinaryFP<fminimum, "min", 158>;
+
+// NaN-propagating maximum: max
+defm MAX : SIMDBinaryFP<fmaximum, "max", 159>;
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
+ string name, bits<32> simdop> {
+ defm op#_#vec_t#_#arg_t :
+ SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
+ name#"\t$dst, $vec", name, simdop>;
+}
+
+// Integer to floating point: convert
+defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>;
+defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>;
+defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>;
+defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>;
+
+// Floating point to integer with saturation: trunc_sat
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>;
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>;
+
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
+ (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
+ (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))),
+ (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))),
+ (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>;
+
+// Bitcasts are nops
+// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
+foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+foreach t2 = !foldl(
+ []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)),
+ acc, !listconcat(acc, [cur])
+ )
+) in
+def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index e42dcbc0a8ac..ad838dfb574a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -31,6 +31,7 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
+ bool removeUnnecessaryUnreachables(MachineFunction &MF);
bool replaceFuncletReturns(MachineFunction &MF);
bool hoistCatches(MachineFunction &MF);
bool addCatchAlls(MachineFunction &MF);
@@ -47,7 +48,7 @@ public:
char WebAssemblyLateEHPrepare::ID = 0;
INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
- "WebAssembly Exception Preparation", false, false)
+ "WebAssembly Late Exception Preparation", false, false)
FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
return new WebAssemblyLateEHPrepare();
@@ -59,7 +60,7 @@ FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
// possible search paths should be the same.
// Returns nullptr in case it does not find any EH pad in the search, or finds
// multiple different EH pads.
-MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
+static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
MachineFunction *MF = MI->getParent()->getParent();
SmallVector<MachineBasicBlock *, 2> WL;
SmallPtrSet<MachineBasicBlock *, 2> Visited;
@@ -83,29 +84,35 @@ MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
return EHPad;
}
-// Erases the given BB and all its children from the function. If other BBs have
-// this BB as a successor, the successor relationships will be deleted as well.
-static void EraseBBAndChildren(MachineBasicBlock *MBB) {
- SmallVector<MachineBasicBlock *, 8> WL;
- WL.push_back(MBB);
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &MBBs) {
+ SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
while (!WL.empty()) {
MachineBasicBlock *MBB = WL.pop_back_val();
- for (auto *Pred : MBB->predecessors())
- Pred->removeSuccessor(MBB);
- for (auto *Succ : MBB->successors()) {
- WL.push_back(Succ);
+ if (!MBB->pred_empty())
+ continue;
+ SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+ WL.append(MBB->succ_begin(), MBB->succ_end());
+ for (auto *Succ : Succs)
MBB->removeSuccessor(Succ);
- }
MBB->eraseFromParent();
}
}
bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
ExceptionHandling::Wasm)
return false;
bool Changed = false;
+ Changed |= removeUnnecessaryUnreachables(MF);
Changed |= addRethrows(MF);
if (!MF.getFunction().hasPersonalityFn())
return Changed;
@@ -118,6 +125,31 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
+ MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (!WebAssembly::isThrow(MI))
+ continue;
+ Changed = true;
+
+ // The instruction after the throw should be an unreachable or a branch to
+ // another BB that should eventually lead to an unreachable. Delete it
+ // because throw itself is a terminator, and also delete successors if
+ // any.
+ MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
+ SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
+ MBB.succ_end());
+ for (auto *Succ : Succs)
+ MBB.removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
+ }
+ }
+
+ return Changed;
+}
+
bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
bool Changed = false;
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -179,7 +211,7 @@ bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
Catches.push_back(&MI);
for (auto *Catch : Catches) {
- MachineBasicBlock *EHPad = GetMatchingEHPad(Catch);
+ MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
assert(EHPad && "No matching EH pad for catch");
if (EHPad->begin() == Catch)
continue;
@@ -238,14 +270,18 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
TII.get(WebAssembly::RETHROW_TO_CALLER));
- // Becasue __cxa_rethrow does not return, the instruction after the
+ // Because __cxa_rethrow does not return, the instruction after the
// rethrow should be an unreachable or a branch to another BB that should
// eventually lead to an unreachable. Delete it because rethrow itself is
// a terminator, and also delete non-EH pad successors if any.
MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
+ SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
for (auto *Succ : MBB.successors())
if (!Succ->isEHPad())
- EraseBBAndChildren(Succ);
+ NonPadSuccessors.push_back(Succ);
+ for (auto *Succ : NonPadSuccessors)
+ MBB.removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(NonPadSuccessors);
}
return Changed;
}
@@ -255,7 +291,7 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
// %exn = catch 0
// call @__clang_call_terminate(%exn)
// unreachable
-// (There can be set_local and get_locals before the call if we didn't run
+// (There can be local.set and local.gets before the call if we didn't run
// RegStackify)
// But code transformations can change or add more control flow, so the call to
// __clang_call_terminate() function may not be in the original EH pad anymore.
@@ -277,7 +313,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
bool Changed = false;
for (auto *Call : ClangCallTerminateCalls) {
- MachineBasicBlock *EHPad = GetMatchingEHPad(Call);
+ MachineBasicBlock *EHPad = getMatchingEHPad(Call);
assert(EHPad && "No matching EH pad for catch");
// If it is already the form we want, skip it
@@ -294,7 +330,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
// This runs after hoistCatches(), so catch instruction should be at the top
assert(WebAssembly::isCatch(*Catch));
// Takes the result register of the catch instruction as argument. There may
- // have been some other set_local/get_locals in between, but at this point
+ // have been some other local.set/local.gets in between, but at this point
// we don't care.
Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
@@ -302,8 +338,11 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
TII.get(WebAssembly::UNREACHABLE));
EHPad->erase(InsertPos, EHPad->end());
- for (auto *Succ : EHPad->successors())
- EraseBBAndChildren(Succ);
+ SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
+ EHPad->succ_end());
+ for (auto *Succ : Succs)
+ EHPad->removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 5fb97e38939a..c9a3527d3fbd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -78,30 +78,102 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *Def = MRI.getVRegDef(Cond);
switch (Def->getOpcode()) {
using namespace WebAssembly;
- case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
- case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
- case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
- case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
- case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
- case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
- case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
- case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
- case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
- case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
- case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
- case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
- case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
- case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
- case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
- case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
- case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
- case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
- case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
- case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
- case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
- case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
- case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
- case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+ case EQ_I32:
+ Def->setDesc(TII.get(NE_I32));
+ Inverted = true;
+ break;
+ case NE_I32:
+ Def->setDesc(TII.get(EQ_I32));
+ Inverted = true;
+ break;
+ case GT_S_I32:
+ Def->setDesc(TII.get(LE_S_I32));
+ Inverted = true;
+ break;
+ case GE_S_I32:
+ Def->setDesc(TII.get(LT_S_I32));
+ Inverted = true;
+ break;
+ case LT_S_I32:
+ Def->setDesc(TII.get(GE_S_I32));
+ Inverted = true;
+ break;
+ case LE_S_I32:
+ Def->setDesc(TII.get(GT_S_I32));
+ Inverted = true;
+ break;
+ case GT_U_I32:
+ Def->setDesc(TII.get(LE_U_I32));
+ Inverted = true;
+ break;
+ case GE_U_I32:
+ Def->setDesc(TII.get(LT_U_I32));
+ Inverted = true;
+ break;
+ case LT_U_I32:
+ Def->setDesc(TII.get(GE_U_I32));
+ Inverted = true;
+ break;
+ case LE_U_I32:
+ Def->setDesc(TII.get(GT_U_I32));
+ Inverted = true;
+ break;
+ case EQ_I64:
+ Def->setDesc(TII.get(NE_I64));
+ Inverted = true;
+ break;
+ case NE_I64:
+ Def->setDesc(TII.get(EQ_I64));
+ Inverted = true;
+ break;
+ case GT_S_I64:
+ Def->setDesc(TII.get(LE_S_I64));
+ Inverted = true;
+ break;
+ case GE_S_I64:
+ Def->setDesc(TII.get(LT_S_I64));
+ Inverted = true;
+ break;
+ case LT_S_I64:
+ Def->setDesc(TII.get(GE_S_I64));
+ Inverted = true;
+ break;
+ case LE_S_I64:
+ Def->setDesc(TII.get(GT_S_I64));
+ Inverted = true;
+ break;
+ case GT_U_I64:
+ Def->setDesc(TII.get(LE_U_I64));
+ Inverted = true;
+ break;
+ case GE_U_I64:
+ Def->setDesc(TII.get(LT_U_I64));
+ Inverted = true;
+ break;
+ case LT_U_I64:
+ Def->setDesc(TII.get(GE_U_I64));
+ Inverted = true;
+ break;
+ case LE_U_I64:
+ Def->setDesc(TII.get(GT_U_I64));
+ Inverted = true;
+ break;
+ case EQ_F32:
+ Def->setDesc(TII.get(NE_F32));
+ Inverted = true;
+ break;
+ case NE_F32:
+ Def->setDesc(TII.get(EQ_F32));
+ Inverted = true;
+ break;
+ case EQ_F64:
+ Def->setDesc(TII.get(NE_F64));
+ Inverted = true;
+ break;
+ case NE_F64:
+ Def->setDesc(TII.get(EQ_F64));
+ Inverted = true;
+ break;
case EQZ_I32: {
// Invert an eqz by replacing it with its operand.
Cond = Def->getOperand(1).getReg();
@@ -109,7 +181,8 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
Inverted = true;
break;
}
- default: break;
+ default:
+ break;
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index e9cb7c10113b..0491f71cea7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -50,24 +50,21 @@
///
/// In detail, this pass does following things:
///
-/// 1) Create three global variables: __THREW__, __threwValue, and __tempRet0.
-/// __tempRet0 will be set within __cxa_find_matching_catch() function in
-/// JS library, and __THREW__ and __threwValue will be set in invoke wrappers
+/// 1) Assumes the existence of global variables: __THREW__, __threwValue
+/// __THREW__ and __threwValue will be set in invoke wrappers
/// in JS glue code. For what invoke wrappers are, refer to 3). These
/// variables are used for both exceptions and setjmp/longjmps.
/// __THREW__ indicates whether an exception or a longjmp occurred or not. 0
/// means nothing occurred, 1 means an exception occurred, and other numbers
/// mean a longjmp occurred. In the case of longjmp, __threwValue variable
/// indicates the corresponding setjmp buffer the longjmp corresponds to.
-/// In exception handling, __tempRet0 indicates the type of an exception
-/// caught, and in setjmp/longjmp, it means the second argument to longjmp
-/// function.
///
/// * Exception handling
///
-/// 2) Create setThrew and setTempRet0 functions.
-/// The global variables created in 1) will exist in wasm address space,
-/// but their values should be set in JS code, so we provide these functions
+/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
+/// at link time.
+/// The global variables in 1) will exist in wasm address space,
+/// but their values should be set in JS code, so these functions
/// as interfaces to JS glue code. These functions are equivalent to the
/// following JS functions, which actually exist in asm.js version of JS
/// library.
@@ -78,10 +75,12 @@
/// __threwValue = value;
/// }
/// }
+//
+/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///
-/// function setTempRet0(value) {
-/// __tempRet0 = value;
-/// }
+/// In exception handling, getTempRet0 indicates the type of an exception
+/// caught, and in setjmp/longjmp, it means the second argument to longjmp
+/// function.
///
/// 3) Lower
/// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
@@ -118,11 +117,10 @@
/// ... use %val ...
/// into
/// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
-/// %val = {%fmc, __tempRet0}
+/// %val = {%fmc, getTempRet0()}
/// ... use %val ...
/// Here N is a number calculated based on the number of clauses.
-/// Global variable __tempRet0 is set within __cxa_find_matching_catch() in
-/// JS glue code.
+/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///
/// 5) Lower
/// resume {%a, %b}
@@ -138,7 +136,17 @@
///
/// * Setjmp / Longjmp handling
///
-/// 7) In the function entry that calls setjmp, initialize setjmpTable and
+/// In case calls to longjmp() exists
+///
+/// 1) Lower
+/// longjmp(buf, value)
+/// into
+/// emscripten_longjmp_jmpbuf(buf, value)
+/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+///
+/// In case calls to setjmp() exists
+///
+/// 2) In the function entry that calls setjmp, initialize setjmpTable and
/// sejmpTableSize as follows:
/// setjmpTableSize = 4;
/// setjmpTable = (int *) malloc(40);
@@ -146,27 +154,22 @@
/// setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS
/// code.
///
-/// 8) Lower
+/// 3) Lower
/// setjmp(buf)
/// into
/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
-/// setjmpTableSize = __tempRet0;
+/// setjmpTableSize = getTempRet0();
/// For each dynamic setjmp call, setjmpTable stores its ID (a number which
/// is incrementally assigned from 0) and its label (a unique number that
/// represents each callsite of setjmp). When we need more entries in
/// setjmpTable, it is reallocated in saveSetjmp() in JS code and it will
/// return the new table address, and assign the new table size in
-/// __tempRet0. saveSetjmp also stores the setjmp's ID into the buffer buf.
-/// A BB with setjmp is split into two after setjmp call in order to make the
-/// post-setjmp BB the possible destination of longjmp BB.
+/// setTempRet0(). saveSetjmp also stores the setjmp's ID into the buffer
+/// buf. A BB with setjmp is split into two after setjmp call in order to
+/// make the post-setjmp BB the possible destination of longjmp BB.
///
-/// 9) Lower
-/// longjmp(buf, value)
-/// into
-/// emscripten_longjmp_jmpbuf(buf, value)
-/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
///
-/// 10) Lower every call that might longjmp into
+/// 4) Lower every call that might longjmp into
/// __THREW__ = 0;
/// call @__invoke_SIG(func, arg1, arg2)
/// %__THREW__.val = __THREW__;
@@ -176,32 +179,32 @@
/// setjmpTableSize);
/// if (%label == 0)
/// emscripten_longjmp(%__THREW__.val, __threwValue);
-/// __tempRet0 = __threwValue;
+/// setTempRet0(__threwValue);
/// } else {
/// %label = -1;
/// }
-/// longjmp_result = __tempRet0;
+/// longjmp_result = getTempRet0();
/// switch label {
/// label 1: goto post-setjmp BB 1
/// label 2: goto post-setjmp BB 2
/// ...
/// default: goto splitted next BB
/// }
-/// testSetjmp examines setjmpTable to see if there is a matching setjmp
-/// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
-/// will be the address of matching jmp_buf buffer and __threwValue be the
-/// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
-/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
-/// each setjmp callsite. Label 0 means this longjmp buffer does not
-/// correspond to one of the setjmp callsites in this function, so in this
-/// case we just chain the longjmp to the caller. (Here we call
-/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
-/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
-/// emscripten_longjmp takes an int. Both of them will eventually be lowered
-/// to emscripten_longjmp in s2wasm, but here we need two signatures - we
-/// can't translate an int value to a jmp_buf.)
-/// Label -1 means no longjmp occurred. Otherwise we jump to the right
-/// post-setjmp BB based on the label.
+/// testSetjmp examines setjmpTable to see if there is a matching setjmp
+/// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
+/// will be the address of matching jmp_buf buffer and __threwValue be the
+/// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
+/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
+/// each setjmp callsite. Label 0 means this longjmp buffer does not
+/// correspond to one of the setjmp callsites in this function, so in this
+/// case we just chain the longjmp to the caller. (Here we call
+/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
+/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
+/// emscripten_longjmp takes an int. Both of them will eventually be lowered
+/// to emscripten_longjmp in s2wasm, but here we need two signatures - we
+/// can't translate an int value to a jmp_buf.)
+/// Label -1 means no longjmp occurred. Otherwise we jump to the right
+/// post-setjmp BB based on the label.
///
///===----------------------------------------------------------------------===//
@@ -239,7 +242,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
GlobalVariable *ThrewGV;
GlobalVariable *ThrewValueGV;
- GlobalVariable *TempRet0GV;
+ Function *GetTempRet0Func;
+ Function *SetTempRet0Func;
Function *ResumeF;
Function *EHTypeIDF;
Function *EmLongjmpF;
@@ -272,9 +276,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
bool canLongjmp(Module &M, const Value *Callee) const;
- void createSetThrewFunction(Module &M);
- void createSetTempRet0Function(Module &M);
-
void rebuildSSA(Function &F);
public:
@@ -282,9 +283,10 @@ public:
WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
: ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
- ThrewGV(nullptr), ThrewValueGV(nullptr), TempRet0GV(nullptr),
- ResumeF(nullptr), EHTypeIDF(nullptr), EmLongjmpF(nullptr),
- EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr), TestSetjmpF(nullptr) {
+ ThrewGV(nullptr), ThrewValueGV(nullptr), GetTempRet0Func(nullptr),
+ SetTempRet0Func(nullptr), ResumeF(nullptr), EHTypeIDF(nullptr),
+ EmLongjmpF(nullptr), EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr),
+ TestSetjmpF(nullptr) {
EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
}
bool runOnModule(Module &M) override;
@@ -333,13 +335,15 @@ static bool canThrow(const Value *V) {
return true;
}
-static GlobalVariable *createGlobalVariableI32(Module &M, IRBuilder<> &IRB,
- const char *Name) {
+// Get a global variable with the given name. If it doesn't exist declare it,
+// which will generate an import and asssumes that it will exist at link time.
+static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
+ const char *Name) {
if (M.getNamedGlobal(Name))
report_fatal_error(Twine("variable name is reserved: ") + Name);
return new GlobalVariable(M, IRB.getInt32Ty(), false,
- GlobalValue::WeakODRLinkage, IRB.getInt32(0), Name);
+ GlobalValue::ExternalLinkage, nullptr, Name);
}
// Simple function name mangler.
@@ -508,7 +512,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
Function *ThrowF = M.getFunction("__cxa_throw");
Function *TerminateF = M.getFunction("__clang_call_terminate");
if (Callee == BeginCatchF || Callee == EndCatchF ||
- Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF)
+ Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF ||
+ Callee == GetTempRet0Func || Callee == SetTempRet0Func)
return false;
// Otherwise we don't know
@@ -521,11 +526,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
// %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
// emscripten_longjmp(%__THREW__.val, threwValue);
-// __tempRet0 = threwValue;
+// setTempRet0(threwValue);
// } else {
// %label = -1;
// }
-// %longjmp_result = __tempRet0;
+// %longjmp_result = getTempRet0();
//
// As output parameters. returns %label, %longjmp_result, and the BB the last
// instruction (%longjmp_result = ...) is in.
@@ -569,15 +574,15 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
IRB.CreateUnreachable();
- // __tempRet0 = threwValue;
+ // setTempRet0(threwValue);
IRB.SetInsertPoint(EndBB2);
- IRB.CreateStore(ThrewValue, TempRet0GV);
+ IRB.CreateCall(SetTempRet0Func, ThrewValue);
IRB.CreateBr(EndBB1);
IRB.SetInsertPoint(ElseBB1);
IRB.CreateBr(EndBB1);
- // longjmp_result = __tempRet0;
+ // longjmp_result = getTempRet0();
IRB.SetInsertPoint(EndBB1);
PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
LabelPHI->addIncoming(ThenLabel, EndBB2);
@@ -587,68 +592,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
// Output parameter assignment
Label = LabelPHI;
EndBB = EndBB1;
- LongjmpResult = IRB.CreateLoad(TempRet0GV, "longjmp_result");
-}
-
-// Create setThrew function
-// function setThrew(threw, value) {
-// if (__THREW__ == 0) {
-// __THREW__ = threw;
-// __threwValue = value;
-// }
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
- LLVMContext &C = M.getContext();
- IRBuilder<> IRB(C);
-
- if (M.getNamedGlobal("setThrew"))
- report_fatal_error("setThrew already exists");
-
- Type *Params[] = {IRB.getInt32Ty(), IRB.getInt32Ty()};
- FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
- Function *F =
- Function::Create(FTy, GlobalValue::WeakODRLinkage, "setThrew", &M);
- Argument *Arg1 = &*(F->arg_begin());
- Argument *Arg2 = &*std::next(F->arg_begin());
- Arg1->setName("threw");
- Arg2->setName("value");
- BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
- BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", F);
- BasicBlock *EndBB = BasicBlock::Create(C, "if.end", F);
-
- IRB.SetInsertPoint(EntryBB);
- Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
- Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(0), "cmp");
- IRB.CreateCondBr(Cmp, ThenBB, EndBB);
-
- IRB.SetInsertPoint(ThenBB);
- IRB.CreateStore(Arg1, ThrewGV);
- IRB.CreateStore(Arg2, ThrewValueGV);
- IRB.CreateBr(EndBB);
-
- IRB.SetInsertPoint(EndBB);
- IRB.CreateRetVoid();
-}
-
-// Create setTempRet0 function
-// function setTempRet0(value) {
-// __tempRet0 = value;
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetTempRet0Function(Module &M) {
- LLVMContext &C = M.getContext();
- IRBuilder<> IRB(C);
-
- if (M.getNamedGlobal("setTempRet0"))
- report_fatal_error("setTempRet0 already exists");
- Type *Params[] = {IRB.getInt32Ty()};
- FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
- Function *F =
- Function::Create(FTy, GlobalValue::WeakODRLinkage, "setTempRet0", &M);
- F->arg_begin()->setName("value");
- BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
- IRB.SetInsertPoint(EntryBB);
- IRB.CreateStore(&*F->arg_begin(), TempRet0GV);
- IRB.CreateRetVoid();
+ LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result");
}
void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
@@ -679,6 +623,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
}
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
+
LLVMContext &C = M.getContext();
IRBuilder<> IRB(C);
@@ -688,11 +634,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
- // Create global variables __THREW__, threwValue, and __tempRet0, which are
- // used in common for both exception handling and setjmp/longjmp handling
- ThrewGV = createGlobalVariableI32(M, IRB, "__THREW__");
- ThrewValueGV = createGlobalVariableI32(M, IRB, "__threwValue");
- TempRet0GV = createGlobalVariableI32(M, IRB, "__tempRet0");
+ // Declare (or get) global variables __THREW__, __threwValue, and
+ // getTempRet0/setTempRet0 function which are used in common for both
+ // exception handling and setjmp/longjmp handling
+ ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__");
+ ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue");
+ GetTempRet0Func =
+ Function::Create(FunctionType::get(IRB.getInt32Ty(), false),
+ GlobalValue::ExternalLinkage, "getTempRet0", &M);
+ SetTempRet0Func = Function::Create(
+ FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
+ GlobalValue::ExternalLinkage, "setTempRet0", &M);
+ GetTempRet0Func->setDoesNotThrow();
+ SetTempRet0Func->setDoesNotThrow();
bool Changed = false;
@@ -721,22 +675,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (DoSjLj) {
Changed = true; // We have setjmp or longjmp somewhere
- // Register saveSetjmp function
- FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
- IRB.getInt32Ty(), Type::getInt32PtrTy(C),
- IRB.getInt32Ty()};
- FunctionType *FTy =
- FunctionType::get(Type::getInt32PtrTy(C), Params, false);
- SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- SaveSetjmpFName, &M);
-
- // Register testSetjmp function
- Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
- FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
- TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- TestSetjmpFName, &M);
-
if (LongjmpF) {
// Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
// defined in JS code
@@ -746,27 +684,43 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
}
- FTy = FunctionType::get(IRB.getVoidTy(),
- {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
- EmLongjmpF =
- Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M);
-
- // Only traverse functions that uses setjmp in order not to insert
- // unnecessary prep / cleanup code in every function
- SmallPtrSet<Function *, 8> SetjmpUsers;
- for (User *U : SetjmpF->users()) {
- auto *UI = cast<Instruction>(U);
- SetjmpUsers.insert(UI->getFunction());
+
+ if (SetjmpF) {
+ // Register saveSetjmp function
+ FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+ SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
+ IRB.getInt32Ty(), Type::getInt32PtrTy(C),
+ IRB.getInt32Ty()};
+ FunctionType *FTy =
+ FunctionType::get(Type::getInt32PtrTy(C), Params, false);
+ SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ SaveSetjmpFName, &M);
+
+ // Register testSetjmp function
+ Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
+ FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
+ TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ TestSetjmpFName, &M);
+
+ FTy = FunctionType::get(IRB.getVoidTy(),
+ {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+ EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ EmLongjmpFName, &M);
+
+ // Only traverse functions that uses setjmp in order not to insert
+ // unnecessary prep / cleanup code in every function
+ SmallPtrSet<Function *, 8> SetjmpUsers;
+ for (User *U : SetjmpF->users()) {
+ auto *UI = cast<Instruction>(U);
+ SetjmpUsers.insert(UI->getFunction());
+ }
+ for (Function *F : SetjmpUsers)
+ runSjLjOnFunction(*F);
}
- for (Function *F : SetjmpUsers)
- runSjLjOnFunction(*F);
}
if (!Changed) {
// Delete unused global variables and functions
- ThrewGV->eraseFromParent();
- ThrewValueGV->eraseFromParent();
- TempRet0GV->eraseFromParent();
if (ResumeF)
ResumeF->eraseFromParent();
if (EHTypeIDF)
@@ -780,12 +734,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
return false;
}
- // If we have made any changes while doing exception handling or
- // setjmp/longjmp handling, we have to create these functions for JavaScript
- // to call.
- createSetThrewFunction(M);
- createSetTempRet0Function(M);
-
return true;
}
@@ -908,8 +856,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
Value *Undef = UndefValue::get(LPI->getType());
Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
- Value *TempRet0 =
- IRB.CreateLoad(TempRet0GV, TempRet0GV->getName() + ".val");
+ Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0");
Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
LPI->replaceAllUsesWith(Pair1);
@@ -990,7 +937,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Instruction *NewSetjmpTable =
IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
Instruction *NewSetjmpTableSize =
- IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
+ IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize");
SetjmpTableInsts.push_back(NewSetjmpTable);
SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
ToErase.push_back(CI);
@@ -1098,7 +1045,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Free setjmpTable buffer before each return instruction
for (BasicBlock &BB : F) {
- TerminatorInst *TI = BB.getTerminator();
+ Instruction *TI = BB.getTerminator();
if (isa<ReturnInst>(TI))
CallInst::CreateFree(SetjmpTable, TI);
}
@@ -1112,7 +1059,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// ...
// somebb:
// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
- // setjmpTableSize = __tempRet0;
+ // setjmpTableSize = getTempRet0();
// So we need to make sure the SSA for these variables is valid so that every
// saveSetjmp and testSetjmp calls have the correct arguments.
SSAUpdater SetjmpTableSSA;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index ee708d637b25..84c877cb8d02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -18,15 +18,15 @@
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-lower-global-dtors"
@@ -59,6 +59,8 @@ ModulePass *llvm::createWebAssemblyLowerGlobalDtors() {
}
bool LowerGlobalDtors::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
+
GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
if (!GV)
return false;
@@ -77,18 +79,20 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// Collect the contents of @llvm.global_dtors, collated by priority and
// associated symbol.
- std::map<uint16_t, MapVector<Constant *, std::vector<Constant *> > > DtorFuncs;
+ std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
for (Value *O : InitList->operands()) {
ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
- if (!CS) continue; // Malformed.
+ if (!CS)
+ continue; // Malformed.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!Priority) continue; // Malformed.
+ if (!Priority)
+ continue; // Malformed.
uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
Constant *DtorFunc = CS->getOperand(1);
if (DtorFunc->isNullValue())
- break; // Found a null terminator, skip the rest.
+ break; // Found a null terminator, skip the rest.
Constant *Associated = CS->getOperand(2);
Associated = cast<Constant>(Associated->stripPointerCastsNoFollowAliases());
@@ -101,31 +105,23 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
LLVMContext &C = M.getContext();
PointerType *VoidStar = Type::getInt8PtrTy(C);
- Type *AtExitFuncArgs[] = { VoidStar };
- FunctionType *AtExitFuncTy = FunctionType::get(
- Type::getVoidTy(C),
- AtExitFuncArgs,
- /*isVarArg=*/false);
-
- Type *AtExitArgs[] = {
- PointerType::get(AtExitFuncTy, 0),
- VoidStar,
- VoidStar
- };
- FunctionType *AtExitTy = FunctionType::get(
- Type::getInt32Ty(C),
- AtExitArgs,
- /*isVarArg=*/false);
+ Type *AtExitFuncArgs[] = {VoidStar};
+ FunctionType *AtExitFuncTy =
+ FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
+ /*isVarArg=*/false);
+
+ Type *AtExitArgs[] = {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar};
+ FunctionType *AtExitTy = FunctionType::get(Type::getInt32Ty(C), AtExitArgs,
+ /*isVarArg=*/false);
Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
// Declare __dso_local.
Constant *DsoHandle = M.getNamedValue("__dso_handle");
if (!DsoHandle) {
Type *DsoHandleTy = Type::getInt8Ty(C);
- GlobalVariable *Handle =
- new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
- GlobalVariable::ExternalWeakLinkage,
- nullptr, "__dso_handle");
+ GlobalVariable *Handle = new GlobalVariable(
+ M, DsoHandleTy, /*isConstant=*/true,
+ GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle");
Handle->setVisibility(GlobalVariable::HiddenVisibility);
DsoHandle = Handle;
}
@@ -139,13 +135,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
Constant *Associated = AssociatedAndMore.first;
Function *CallDtors = Function::Create(
- AtExitFuncTy, Function::PrivateLinkage,
- "call_dtors" +
- (Priority != UINT16_MAX ?
- (Twine(".") + Twine(Priority)) : Twine()) +
- (!Associated->isNullValue() ?
- (Twine(".") + Associated->getName()) : Twine()),
- &M);
+ AtExitFuncTy, Function::PrivateLinkage,
+ "call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
for (auto Dtor : AssociatedAndMore.second)
@@ -155,29 +151,29 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
/*isVarArg=*/false);
Function *RegisterCallDtors = Function::Create(
- VoidVoid, Function::PrivateLinkage,
- "register_call_dtors" +
- (Priority != UINT16_MAX ?
- (Twine(".") + Twine(Priority)) : Twine()) +
- (!Associated->isNullValue() ?
- (Twine(".") + Associated->getName()) : Twine()),
- &M);
+ VoidVoid, Function::PrivateLinkage,
+ "register_call_dtors" +
+ (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+ : Twine()) +
+ (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+ : Twine()),
+ &M);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors);
BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors);
BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors);
Value *Null = ConstantPointerNull::get(VoidStar);
- Value *Args[] = { CallDtors, Null, DsoHandle };
+ Value *Args[] = {CallDtors, Null, DsoHandle};
Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB);
Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res,
Constant::getNullValue(Res->getType()));
BranchInst::Create(FailBB, RetBB, Cmp, EntryBB);
// If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave.
- // This should be very rare, because if the process is running out of memory
- // before main has even started, something is wrong.
- CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap),
- "", FailBB);
+ // This should be very rare, because if the process is running out of
+ // memory before main has even started, something is wrong.
+ CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "",
+ FailBB);
new UnreachableInst(C, FailBB);
ReturnInst::Create(C, RetBB);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index d85db14fc679..fa862fbaa634 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -30,6 +30,21 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to
+// stack instructions
+#define GET_INSTRMAP_INFO 1
+#include "WebAssemblyGenInstrInfo.inc"
+
+// This disables the removal of registers when lowering into MC, as required
+// by some current tests.
+static cl::opt<bool>
+ WasmKeepRegisters("wasm-keep-registers", cl::Hidden,
+ cl::desc("WebAssembly: output stack registers in"
+ " instruction output for test purposes only."),
+ cl::init(false));
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
+
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const GlobalValue *Global = MO.getGlobal();
@@ -40,35 +55,13 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const TargetMachine &TM = MF.getTarget();
const Function &CurrentFunc = MF.getFunction();
- SmallVector<wasm::ValType, 4> Returns;
- SmallVector<wasm::ValType, 4> Params;
-
- wasm::ValType iPTR =
- MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() ?
- wasm::ValType::I64 :
- wasm::ValType::I32;
-
- SmallVector<MVT, 4> ResultMVTs;
- ComputeLegalValueVTs(CurrentFunc, TM, FuncTy->getReturnType(), ResultMVTs);
- // WebAssembly can't currently handle returning tuples.
- if (ResultMVTs.size() <= 1)
- for (MVT ResultMVT : ResultMVTs)
- Returns.push_back(WebAssembly::toValType(ResultMVT));
- else
- Params.push_back(iPTR);
-
- for (Type *Ty : FuncTy->params()) {
- SmallVector<MVT, 4> ParamMVTs;
- ComputeLegalValueVTs(CurrentFunc, TM, Ty, ParamMVTs);
- for (MVT ParamMVT : ParamMVTs)
- Params.push_back(WebAssembly::toValType(ParamMVT));
- }
-
- if (FuncTy->isVarArg())
- Params.push_back(iPTR);
+ SmallVector<MVT, 1> ResultMVTs;
+ SmallVector<MVT, 4> ParamMVTs;
+ ComputeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
+ auto Signature = SignatureFromMVTs(ResultMVTs, ParamMVTs);
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
}
@@ -82,10 +75,10 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
- // __stack_pointer is a global variable; all other external symbols used by
- // CodeGen are functions. It's OK to hardcode knowledge of specific symbols
- // here; this method is precisely there for fetching the signatures of known
- // Clang-provided symbols.
+ // Except for the two exceptions (__stack_pointer and __cpp_exception), all
+ // other external symbols used by CodeGen are functions. It's OK to hardcode
+ // knowledge of specific symbols here; this method is precisely there for
+ // fetching the signatures of known Clang-provided symbols.
if (strcmp(Name, "__stack_pointer") == 0) {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{
@@ -97,27 +90,55 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
- GetSignature(Subtarget, Name, Returns, Params);
+ if (strcmp(Name, "__cpp_exception") == 0) {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+ // We can't confirm its signature index for now because there can be
+ // imported exceptions. Set it to be 0 for now.
+ WasmSym->setEventType(
+ {wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0});
+ // We may have multiple C++ compilation units to be linked together, each of
+ // which defines the exception symbol. To resolve them, we declare them as
+ // weak.
+ WasmSym->setWeak(true);
+ WasmSym->setExternal(true);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
- WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64
+ // (for wasm64) param type and void return type. The reaon is, all C++
+ // exception values are pointers, and to share the type section with
+ // functions, exceptions are assumed to have void return type.
+ Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64
+ : wasm::ValType::I32);
+ } else { // Function symbols
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ GetLibcallSignature(Subtarget, Name, Returns, Params);
+ }
+ auto Signature =
+ make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
return WasmSym;
}
MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
int64_t Offset,
- bool IsFunc) const {
+ bool IsFunc, bool IsGlob,
+ bool IsEvent) const {
MCSymbolRefExpr::VariantKind VK =
IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
- : MCSymbolRefExpr::VK_None;
+ : IsGlob ? MCSymbolRefExpr::VK_WebAssembly_GLOBAL
+ : IsEvent ? MCSymbolRefExpr::VK_WebAssembly_EVENT
+ : MCSymbolRefExpr::VK_None;
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
if (Offset != 0) {
if (IsFunc)
report_fatal_error("Function addresses with offsets not supported");
+ if (IsGlob)
+ report_fatal_error("Global indexes with offsets not supported");
+ if (IsEvent)
+ report_fatal_error("Event indexes with offsets not supported");
Expr =
MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
}
@@ -135,6 +156,8 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
return wasm::ValType::F32;
if (RC == &WebAssembly::F64RegClass)
return wasm::ValType::F64;
+ if (RC == &WebAssembly::V128RegClass)
+ return wasm::ValType::V128;
llvm_unreachable("Unexpected register class");
}
@@ -187,8 +210,10 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
Params.pop_back();
MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
- WasmSym->setReturns(std::move(Returns));
- WasmSym->setParams(std::move(Params));
+ auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
+ std::move(Params));
+ WasmSym->setSignature(Signature.get());
+ Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr = MCSymbolRefExpr::create(
@@ -212,21 +237,68 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_GlobalAddress:
- assert(MO.getTargetFlags() == 0 &&
+ assert(MO.getTargetFlags() == WebAssemblyII::MO_NO_FLAG &&
"WebAssembly does not use target flags on GlobalAddresses");
MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
- MO.getGlobal()->getValueType()->isFunctionTy());
+ MO.getGlobal()->getValueType()->isFunctionTy(),
+ false, false);
break;
case MachineOperand::MO_ExternalSymbol:
// The target flag indicates whether this is a symbol for a
// variable or a function.
- assert((MO.getTargetFlags() & -2) == 0 &&
- "WebAssembly uses only one target flag bit on ExternalSymbols");
- MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
- MO.getTargetFlags() & 1);
+ assert((MO.getTargetFlags() & ~WebAssemblyII::MO_SYMBOL_MASK) == 0 &&
+ "WebAssembly uses only symbol flags on ExternalSymbols");
+ MCOp = LowerSymbolOperand(
+ GetExternalSymbolSymbol(MO), /*Offset=*/0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_FUNCTION) != 0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_GLOBAL) != 0,
+ (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_EVENT) != 0);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ // This is currently used only for LSDA symbols (GCC_except_table),
+ // because global addresses or other external symbols are handled above.
+ assert(MO.getTargetFlags() == 0 &&
+ "WebAssembly does not use target flags on MCSymbol");
+ MCOp = LowerSymbolOperand(MO.getMCSymbol(), /*Offset=*/0, false, false,
+ false);
break;
}
OutMI.addOperand(MCOp);
}
+
+ if (!WasmKeepRegisters)
+ removeRegisterOperands(MI, OutMI);
+}
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) {
+ // Remove all uses of stackified registers to bring the instruction format
+ // into its final stack form used thruout MC, and transition opcodes to
+ // their _S variant.
+ // We do this seperate from the above code that still may need these
+ // registers for e.g. call_indirect signatures.
+ // See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for
+ // details.
+ // TODO: the code above creates new registers which are then removed here.
+ // That code could be slightly simplified by not doing that, though maybe
+ // it is simpler conceptually to keep the code above in "register mode"
+ // until this transition point.
+ // FIXME: we are not processing inline assembly, which contains register
+ // operands, because it is used by later target generic code.
+ if (MI->isDebugInstr() || MI->isLabel() || MI->isInlineAsm())
+ return;
+
+ // Transform to _S instruction.
+ auto RegOpcode = OutMI.getOpcode();
+ auto StackOpcode = WebAssembly::getStackOpcode(RegOpcode);
+ assert(StackOpcode != -1 && "Failed to stackify instruction");
+ OutMI.setOpcode(StackOpcode);
+
+ // Remove register operands.
+ for (auto I = OutMI.getNumOperands(); I; --I) {
+ auto &MO = OutMI.getOperand(I - 1);
+ if (MO.isReg()) {
+ OutMI.erase(&MO);
+ }
+ }
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 41b4313bb38c..fa7a0ea61b3b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -33,8 +33,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
- bool IsFunc) const;
+ MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, bool IsFunc,
+ bool IsGlob, bool IsEvent) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index e511e574050f..0157af0f8510 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -43,20 +43,38 @@ void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
}
}
-void llvm::ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+ const TargetMachine &TM,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) {
- ComputeLegalValueVTs(F, TM, F.getReturnType(), Results);
+ ComputeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
+ MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
if (Results.size() > 1) {
// WebAssembly currently can't lower returns of multiple values without
// demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
// replace multiple return values with a pointer parameter.
Results.clear();
- Params.push_back(
- MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()));
+ Params.push_back(PtrVT);
}
- for (auto &Arg : F.args())
- ComputeLegalValueVTs(F, TM, Arg.getType(), Params);
+ for (auto *Param : Ty->params())
+ ComputeLegalValueVTs(F, TM, Param, Params);
+ if (Ty->isVarArg())
+ Params.push_back(PtrVT);
+}
+
+void llvm::ValTypesFromMVTs(const ArrayRef<MVT> &In,
+ SmallVectorImpl<wasm::ValType> &Out) {
+ for (MVT Ty : In)
+ Out.push_back(WebAssembly::toValType(Ty));
+}
+
+std::unique_ptr<wasm::WasmSignature>
+llvm::SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+ const SmallVectorImpl<MVT> &Params) {
+ auto Sig = make_unique<wasm::WasmSignature>();
+ ValTypesFromMVTs(Results, Sig->Returns);
+ ValTypesFromMVTs(Params, Sig->Params);
+ return Sig;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index a60b10fc5309..4be4beb85d04 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -17,7 +17,9 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCSymbolWasm.h"
namespace llvm {
@@ -50,7 +52,7 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
// overaligned values on the user stack.
unsigned BasePtrVreg = -1U;
- public:
+public:
explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
~WebAssemblyFunctionInfo() override;
@@ -60,7 +62,10 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
void addResult(MVT VT) { Results.push_back(VT); }
const std::vector<MVT> &getResults() const { return Results; }
- void clearParamsAndResults() { Params.clear(); Results.clear(); }
+ void clearParamsAndResults() {
+ Params.clear();
+ Results.clear();
+ }
void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); }
void setLocal(size_t i, MVT VT) { Locals[i] = VT; }
@@ -115,13 +120,22 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
}
};
-void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
- Type *Ty, SmallVectorImpl<MVT> &ValueVTs);
+void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
+ SmallVectorImpl<MVT> &ValueVTs);
-void ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
- SmallVectorImpl<MVT> &Params,
+// Compute the signature for a given FunctionType (Ty). Note that it's not the
+// signature for F (F is just used to get varous context)
+void ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+ const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results);
+void ValTypesFromMVTs(const ArrayRef<MVT> &In,
+ SmallVectorImpl<wasm::ValType> &Out);
+
+std::unique_ptr<wasm::WasmSignature>
+SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+ const SmallVectorImpl<MVT> &Params);
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 893e8484c4c6..c4b5e96db0c7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -1,4 +1,4 @@
-//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==//
//
// The LLVM Compiler Infrastructure
//
@@ -8,19 +8,22 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements an optimization pass using store result values.
+/// This file implements an optimization pass using memory intrinsic results.
///
-/// WebAssembly's store instructions return the stored value. This is to enable
-/// an optimization wherein uses of the stored value can be replaced by uses of
-/// the store's result value, making the stored value register more likely to
-/// be single-use, thus more likely to be useful to register stackifying, and
-/// potentially also exposing the store to register stackifying. These both can
-/// reduce get_local/set_local traffic.
+/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination
+/// address. They are in the form of
+/// %dst_new = call @memcpy %dst, %src, %len
+/// where %dst and %dst_new registers contain the same value.
///
-/// This pass also performs this optimization for memcpy, memmove, and memset
-/// calls, since the LLVM intrinsics for these return void so they can't use the
-/// returned attribute and consequently aren't handled by the OptimizeReturned
-/// pass.
+/// This is to enable an optimization wherein uses of the %dst register used in
+/// the parameter can be replaced by uses of the %dst_new register used in the
+/// result, making the %dst register more likely to be single-use, thus more
+/// likely to be useful to register stackifying, and potentially also exposing
+/// the call instruction itself to register stackifying. These both can reduce
+/// local.get/local.set traffic.
+///
+/// The LLVM intrinsics for these return void so they can't use the returned
+/// attribute and consequently aren't handled by the OptimizeReturned pass.
///
//===----------------------------------------------------------------------===//
@@ -38,15 +41,17 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define DEBUG_TYPE "wasm-store-results"
+#define DEBUG_TYPE "wasm-mem-intrinsic-results"
namespace {
-class WebAssemblyStoreResults final : public MachineFunctionPass {
+class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+ WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override { return "WebAssembly Store Results"; }
+ StringRef getPassName() const override {
+ return "WebAssembly Memory Intrinsic Results";
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -67,12 +72,13 @@ private:
};
} // end anonymous namespace
-char WebAssemblyStoreResults::ID = 0;
-INITIALIZE_PASS(WebAssemblyStoreResults, DEBUG_TYPE,
- "Optimize store result values for WebAssembly", false, false)
+char WebAssemblyMemIntrinsicResults::ID = 0;
+INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE,
+ "Optimize memory intrinsic result values for WebAssembly",
+ false, false)
-FunctionPass *llvm::createWebAssemblyStoreResults() {
- return new WebAssemblyStoreResults();
+FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() {
+ return new WebAssemblyMemIntrinsicResults();
}
// Replace uses of FromReg with ToReg if they are dominated by MI.
@@ -91,7 +97,8 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
SmallVector<SlotIndex, 4> Indices;
- for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); I != E;) {
+ for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end();
+ I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
@@ -132,9 +139,9 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
// If we replaced all dominated uses, FromReg is now killed at MI.
if (!FromLI->liveAt(FromIdx.getDeadSlot()))
- MI.addRegisterKilled(FromReg,
- MBB.getParent()->getSubtarget<WebAssemblySubtarget>()
- .getRegisterInfo());
+ MI.addRegisterKilled(FromReg, MBB.getParent()
+ ->getSubtarget<WebAssemblySubtarget>()
+ .getRegisterInfo());
}
return Changed;
@@ -142,8 +149,7 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT,
- LiveIntervals &LIS,
+ MachineDominatorTree &MDT, LiveIntervals &LIS,
const WebAssemblyTargetLowering &TLI,
const TargetLibraryInfo &LibInfo) {
MachineOperand &Op1 = MI.getOperand(1);
@@ -164,14 +170,14 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
unsigned FromReg = MI.getOperand(2).getReg();
unsigned ToReg = MI.getOperand(0).getReg();
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
- report_fatal_error("Store results: call to builtin function with wrong "
- "signature, from/to mismatch");
+ report_fatal_error("Memory Intrinsic results: call to builtin function "
+ "with wrong signature, from/to mismatch");
return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
}
-bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG({
- dbgs() << "********** Store Results **********\n"
+ dbgs() << "********** Memory Intrinsic Results **********\n"
<< "********** Function: " << MF.getName() << '\n';
});
@@ -186,7 +192,8 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
// We don't preserve SSA form.
MRI.leaveSSA();
- assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking");
+ assert(MRI.tracksLiveness() &&
+ "MemIntrinsicResults expects liveness tracking");
for (auto &MBB : MF) {
LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 04ac22a589ea..3d0a15244ee0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -65,7 +65,8 @@ FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() {
return new WebAssemblyOptimizeLiveIntervals();
}
-bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
+ MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n"
"********** Function: "
<< MF.getName() << '\n');
@@ -76,11 +77,10 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
// We don't preserve SSA form.
MRI.leaveSSA();
- assert(MRI.tracksLiveness() &&
- "OptimizeLiveIntervals expects liveness");
+ assert(MRI.tracksLiveness() && "OptimizeLiveIntervals expects liveness");
// Split multiple-VN LiveIntervals into multiple LiveIntervals.
- SmallVector<LiveInterval*, 4> SplitLIs;
+ SmallVector<LiveInterval *, 4> SplitLIs;
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
@@ -94,7 +94,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
// instructions to satisfy LiveIntervals' requirement that all uses be
// dominated by defs. Now that LiveIntervals has computed which of these
// defs are actually needed and which are dead, remove the dead ones.
- for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) {
+ for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) {
MachineInstr *MI = &*MII++;
if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 113ee2532bce..2c018d0785a7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -74,6 +74,10 @@ void OptimizeReturned::visitCallSite(CallSite CS) {
}
bool OptimizeReturned::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "********** Optimize returned Attributes **********\n"
+ "********** Function: "
+ << F.getName() << '\n');
+
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
visit(F);
return true;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index a54484407805..2dfd85953f14 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -192,11 +192,21 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
WebAssembly::COPY_V128);
break;
+ case WebAssembly::RETURN_v2i64:
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
+ WebAssembly::COPY_V128);
+ break;
case WebAssembly::RETURN_v4f32:
Changed |= MaybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
WebAssembly::COPY_V128);
break;
+ case WebAssembly::RETURN_v2f64:
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
+ WebAssembly::COPY_V128);
+ break;
case WebAssembly::RETURN_VOID:
Changed |= MaybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index e44e7057e233..0be0ba657830 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -70,7 +70,8 @@ static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
return false;
}
-bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
+ MachineFunction &MF) {
LLVM_DEBUG({
dbgs() << "********** Prepare For LiveIntervals **********\n"
<< "********** Function: " << MF.getName() << '\n';
@@ -112,7 +113,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
// Move ARGUMENT_* instructions to the top of the entry block, so that their
// liveness reflects the fact that these really are live-in values.
- for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
+ for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
MachineInstr &MI = *MII++;
if (WebAssembly::isArgument(MI)) {
MI.removeFromParent();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index d69a27937105..d97b13a8d699 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -118,16 +118,15 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// registers), by weight next, and then by position.
// TODO: Investigate more intelligent sorting heuristics. For starters, we
// should try to coalesce adjacent live intervals before non-adjacent ones.
- llvm::sort(SortedIntervals.begin(), SortedIntervals.end(),
- [MRI](LiveInterval *LHS, LiveInterval *RHS) {
- if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
- return MRI->isLiveIn(LHS->reg);
- if (LHS->weight != RHS->weight)
- return LHS->weight > RHS->weight;
- if (LHS->empty() || RHS->empty())
- return !LHS->empty() && RHS->empty();
- return *LHS < *RHS;
- });
+ llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+ if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+ return MRI->isLiveIn(LHS->reg);
+ if (LHS->weight != RHS->weight)
+ return LHS->weight > RHS->weight;
+ if (LHS->empty() || RHS->empty())
+ return !LHS->empty() && RHS->empty();
+ return *LHS < *RHS;
+ });
LLVM_DEBUG(dbgs() << "Coloring register intervals:\n");
SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 9f5d5bd87831..1eb32ed64494 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -22,9 +22,11 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
#include "WebAssembly.h"
+#include "WebAssemblyDebugValueManager.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -97,11 +99,11 @@ static void ImposeStackOrdering(MachineInstr *MI) {
static void ConvertImplicitDefToConstZero(MachineInstr *MI,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
- MachineFunction &MF) {
+ MachineFunction &MF,
+ LiveIntervals &LIS) {
assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
- const auto *RegClass =
- MRI.getRegClass(MI->getOperand(0).getReg());
+ const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
if (RegClass == &WebAssembly::I32RegClass) {
MI->setDesc(TII->get(WebAssembly::CONST_I32));
MI->addOperand(MachineOperand::CreateImm(0));
@@ -118,6 +120,14 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
+ } else if (RegClass == &WebAssembly::V128RegClass) {
+ unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
+ MI->addOperand(MachineOperand::CreateReg(TempReg, false));
+ MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(WebAssembly::CONST_I32), TempReg)
+ .addImm(0);
+ LIS.InsertMachineInstrInMaps(*Const);
} else {
llvm_unreachable("Unexpected reg class");
}
@@ -172,29 +182,24 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
// Check for stores.
if (MI.mayStore()) {
Write = true;
-
- // Check for stores to __stack_pointer.
- for (auto MMO : MI.memoperands()) {
- const MachinePointerInfo &MPI = MMO->getPointerInfo();
- if (MPI.V.is<const PseudoSourceValue *>()) {
- auto PSV = MPI.V.get<const PseudoSourceValue *>();
- if (const ExternalSymbolPseudoSourceValue *EPSV =
- dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
- if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
- StackPointer = true;
- }
- }
- }
} else if (MI.hasOrderedMemoryRef()) {
switch (MI.getOpcode()) {
- case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
- case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
- case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
- case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
- case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
- case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
- case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
- case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ case WebAssembly::DIV_S_I32:
+ case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32:
+ case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32:
+ case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32:
+ case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32:
+ case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64:
+ case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32:
+ case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64:
+ case WebAssembly::I64_TRUNC_U_F64:
// These instruction have hasUnmodeledSideEffects() returning true
// because they trap on overflow and invalid so they can't be arbitrarily
// moved, however hasOrderedMemoryRef() interprets this plus their lack
@@ -214,14 +219,22 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
// Check for side effects.
if (MI.hasUnmodeledSideEffects()) {
switch (MI.getOpcode()) {
- case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
- case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
- case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
- case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
- case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
- case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
- case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
- case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ case WebAssembly::DIV_S_I32:
+ case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32:
+ case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32:
+ case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32:
+ case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32:
+ case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64:
+ case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32:
+ case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64:
+ case WebAssembly::I64_TRUNC_U_F64:
// These instructions have hasUnmodeledSideEffects() returning true
// because they trap on overflow and invalid so they can't be arbitrarily
// moved, however in the specific case of register stackifying, it is safe
@@ -233,22 +246,15 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
}
}
+ // Check for writes to __stack_pointer global.
+ if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+ strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
+ StackPointer = true;
+
// Analyze calls.
if (MI.isCall()) {
- switch (MI.getOpcode()) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_INDIRECT_VOID:
- QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
- break;
- case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
- case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
- case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
- case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
- QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
- break;
- default:
- llvm_unreachable("unexpected call opcode");
- }
+ unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
+ QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
}
}
@@ -263,8 +269,7 @@ static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
// LiveIntervals to handle complex cases.
static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
const MachineRegisterInfo &MRI,
- const LiveIntervals &LIS)
-{
+ const LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
return Def;
@@ -280,17 +285,16 @@ static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
// Test whether Reg, as defined at Def, has exactly one use. This is a
// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
// to handle complex cases.
-static bool HasOneUse(unsigned Reg, MachineInstr *Def,
- MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
- LiveIntervals &LIS) {
+static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
+ MachineDominatorTree &MDT, LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MRI.hasOneUse(Reg))
return true;
bool HasOne = false;
const LiveInterval &LI = LIS.getInterval(Reg);
- const VNInfo *DefVNI = LI.getVNInfoAt(
- LIS.getInstructionIndex(*Def).getRegSlot());
+ const VNInfo *DefVNI =
+ LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
assert(DefVNI);
for (auto &I : MRI.use_nodbg_operands(Reg)) {
const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
@@ -403,7 +407,6 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
if (UseVNI != OneUseVNI)
continue;
- const MachineInstr *OneUseInst = OneUse.getParent();
if (UseInst == OneUseInst) {
// Another use in the same instruction. We need to ensure that the one
// selected use happens "before" it.
@@ -415,8 +418,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
// Actually, dominating is over-conservative. Test that the use would
// happen after the one selected use in the stack evaluation order.
//
- // This is needed as a consequence of using implicit get_locals for
- // uses and implicit set_locals for defs.
+ // This is needed as a consequence of using implicit local.gets for
+ // uses and implicit local.sets for defs.
if (UseInst->getDesc().getNumDefs() == 0)
return false;
const MachineOperand &MO = UseInst->getOperand(0);
@@ -426,8 +429,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
!MFI.isVRegStackified(DefReg))
return false;
- assert(MRI.hasOneUse(DefReg));
- const MachineOperand &NewUse = *MRI.use_begin(DefReg);
+ assert(MRI.hasOneNonDBGUse(DefReg));
+ const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
const MachineInstr *NewUseInst = NewUse.getParent();
if (NewUseInst == OneUseInst) {
if (&OneUse > &NewUse)
@@ -459,22 +462,23 @@ static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
// Shrink LI to its uses, cleaning up LI.
static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
if (LIS.shrinkToUses(&LI)) {
- SmallVector<LiveInterval*, 4> SplitLIs;
+ SmallVector<LiveInterval *, 4> SplitLIs;
LIS.splitSeparateComponents(LI, SplitLIs);
}
}
/// A single-use def in the same block with no intervening memory or register
/// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
- MachineInstr *Def,
- MachineBasicBlock &MBB,
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
+ MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
+ WebAssemblyDebugValueManager DefDIs(Def);
MBB.splice(Insert, &MBB, Def);
+ DefDIs.move(Insert);
LIS.handleMove(*Def);
if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
@@ -499,6 +503,8 @@ static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
MFI.stackifyVReg(NewReg);
+ DefDIs.updateReg(NewReg);
+
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}
@@ -516,6 +522,8 @@ static MachineInstr *RematerializeCheapDef(
LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
+ WebAssemblyDebugValueManager DefDIs(&Def);
+
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
Op.setReg(NewReg);
@@ -536,6 +544,7 @@ static MachineInstr *RematerializeCheapDef(
}
// If that was the last use of the original, delete the original.
+ // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
if (IsDead) {
LLVM_DEBUG(dbgs() << " - Deleting original\n");
SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
@@ -543,6 +552,11 @@ static MachineInstr *RematerializeCheapDef(
LIS.removeInterval(Reg);
LIS.RemoveMachineInstrFromMaps(Def);
Def.eraseFromParent();
+
+ DefDIs.move(&*Insert);
+ DefDIs.updateReg(NewReg);
+ } else {
+ DefDIs.clone(&*Insert, NewReg);
}
return Clone;
@@ -566,7 +580,7 @@ static MachineInstr *RematerializeCheapDef(
/// INST ..., Reg, ...
/// INST ..., Reg, ...
///
-/// with DefReg and TeeReg stackified. This eliminates a get_local from the
+/// with DefReg and TeeReg stackified. This eliminates a local.get from the
/// resulting code.
static MachineInstr *MoveAndTeeForMultiUse(
unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
@@ -574,6 +588,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
+ WebAssemblyDebugValueManager DefDIs(Def);
+
// Move Def into place.
MBB.splice(Insert, &MBB, Def);
LIS.handleMove(*Def);
@@ -592,6 +608,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
+ DefDIs.move(Insert);
+
// Tell LiveIntervals we moved the original vreg def from Def to Tee.
LiveInterval &LI = LIS.getInterval(Reg);
LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
@@ -608,6 +626,9 @@ static MachineInstr *MoveAndTeeForMultiUse(
ImposeStackOrdering(Def);
ImposeStackOrdering(Tee);
+ DefDIs.clone(Tee, DefReg);
+ DefDIs.clone(Insert, TeeReg);
+
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
return Def;
@@ -672,8 +693,8 @@ public:
/// operand in the tree that we haven't visited yet. Moving a definition of
/// Reg to a point in the tree after that would change its value.
///
- /// This is needed as a consequence of using implicit get_locals for
- /// uses and implicit set_locals for defs.
+ /// This is needed as a consequence of using implicit local.gets for
+ /// uses and implicit local.sets for defs.
bool IsOnStack(unsigned Reg) const {
for (const RangeTy &Range : Worklist)
for (const MachineOperand &MO : Range)
@@ -687,9 +708,9 @@ public:
/// tried for the current instruction and didn't work.
class CommutingState {
/// There are effectively three states: the initial state where we haven't
- /// started commuting anything and we don't know anything yet, the tenative
+ /// started commuting anything and we don't know anything yet, the tentative
/// state where we've commuted the operands of the current instruction and are
- /// revisting it, and the declined state where we've reverted the operands
+ /// revisiting it, and the declined state where we've reverted the operands
/// back to their original order and will no longer commute it further.
bool TentativelyCommuting;
bool Declined;
@@ -831,7 +852,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// to a constant 0 so that the def is explicit, and the push/pop
// correspondence is maintained.
if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
- ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
+ ConvertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
// We stackified an operand. Add the defining instruction's operands to
// the worklist stack now to continue to build an ever deeper tree.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index b6481ac2d4ae..1f0870865b06 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 29f42b96b249..a7c3d177724d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -63,6 +63,6 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>;
def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>;
def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
-def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;
+def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
+ (add V128_0)>;
def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index f432b367d156..e5a3e47a3bcd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -54,8 +54,8 @@ private:
char WebAssemblyReplacePhysRegs::ID = 0;
INITIALIZE_PASS(WebAssemblyReplacePhysRegs, DEBUG_TYPE,
- "Replace physical registers with virtual registers",
- false, false)
+ "Replace physical registers with virtual registers", false,
+ false)
FunctionPass *llvm::createWebAssemblyReplacePhysRegs() {
return new WebAssemblyReplacePhysRegs();
@@ -86,7 +86,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
// Replace explicit uses of the physical register with a virtual register.
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
unsigned VReg = WebAssembly::NoRegister;
- for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) {
+ for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
MachineOperand &MO = *I++;
if (!MO.isImplicit()) {
if (VReg == WebAssembly::NoRegister)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index fe8a5e4c06f1..6cf81a9d77b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -88,7 +88,6 @@ enum RuntimeLibcallSignature {
unsupported
};
-
struct RuntimeLibcallSignatureTable {
std::vector<RuntimeLibcallSignature> Table;
@@ -486,18 +485,17 @@ struct StaticLibcallNameMap {
} // end anonymous namespace
-
-
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
- RTLIB::Libcall LC, SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params) {
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
assert(Rets.empty());
assert(Params.empty());
wasm::ValType iPTR =
Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
- auto& Table = RuntimeLibcallSignatures->Table;
+ auto &Table = RuntimeLibcallSignatures->Table;
switch (Table[LC]) {
case func:
break;
@@ -834,11 +832,12 @@ void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
// other than here, just roll its logic into this version.
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name,
- SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params) {
- auto& Map = LibcallNameMap->Map;
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ auto &Map = LibcallNameMap->Map;
auto val = Map.find(Name);
assert(val != Map.end() && "unexpected runtime library name");
- return GetSignature(Subtarget, val->second, Rets, Params);
+ return GetLibcallSignature(Subtarget, val->second, Rets, Params);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index 2ba65ff5b716..7fa70bea96de 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -23,14 +23,15 @@ namespace llvm {
class WebAssemblySubtarget;
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
- RTLIB::Libcall LC,
- SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params);
-
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
- const char *Name, SmallVectorImpl<wasm::ValType> &Rets,
- SmallVectorImpl<wasm::ValType> &Params);
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 14221993603a..c95af88c6f43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -60,8 +60,7 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
assert(MI.hasOneMemOperand() &&
"Load and store instructions have exactly one mem operand");
assert((*MI.memoperands_begin())->getSize() ==
- (UINT64_C(1)
- << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+ (UINT64_C(1) << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
"Default p2align value should be natural");
assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
WebAssembly::OPERAND_P2ALIGN &&
@@ -69,8 +68,8 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
// WebAssembly does not currently support supernatural alignment.
- P2Align = std::min(
- P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
+ P2Align = std::min(P2Align,
+ uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
MI.getOperand(OperandNo).setImm(P2Align);
}
@@ -90,6 +89,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_F32:
case WebAssembly::LOAD_F64:
+ case WebAssembly::LOAD_v16i8:
+ case WebAssembly::LOAD_v8i16:
+ case WebAssembly::LOAD_v4i32:
+ case WebAssembly::LOAD_v2i64:
+ case WebAssembly::LOAD_v4f32:
+ case WebAssembly::LOAD_v2f64:
case WebAssembly::LOAD8_S_I32:
case WebAssembly::LOAD8_U_I32:
case WebAssembly::LOAD16_S_I32:
@@ -119,6 +124,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
@@ -131,6 +138,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I32:
case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I32:
@@ -143,18 +152,30 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I32:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+ case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I64:
case WebAssembly::ATOMIC_RMW_AND_I64:
case WebAssembly::ATOMIC_RMW_OR_I64:
case WebAssembly::ATOMIC_RMW_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I64:
+ case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+ case WebAssembly::ATOMIC_NOTIFY:
+ case WebAssembly::ATOMIC_WAIT_I32:
+ case WebAssembly::ATOMIC_WAIT_I64:
RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
break;
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_v16i8:
+ case WebAssembly::STORE_v8i16:
+ case WebAssembly::STORE_v4i32:
+ case WebAssembly::STORE_v2i64:
+ case WebAssembly::STORE_v4f32:
+ case WebAssembly::STORE_v2f64:
case WebAssembly::STORE8_I32:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE8_I64:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index d6af0fb219d7..98133e2153a0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,10 +40,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const std::string &CPU,
const std::string &FS,
const TargetMachine &TM)
- : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
- HasAtomics(false), HasNontrappingFPToInt(false), HasSignExt(false),
- HasExceptionHandling(false), CPUString(CPU), TargetTriple(TT),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+ : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+ TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableMachineScheduler() const {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index b170dbff3b32..0a0c04609ac4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -29,11 +29,16 @@
namespace llvm {
class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
- bool HasSIMD128;
- bool HasAtomics;
- bool HasNontrappingFPToInt;
- bool HasSignExt;
- bool HasExceptionHandling;
+ enum SIMDEnum {
+ NoSIMD,
+ SIMD128,
+ UnimplementedSIMD128,
+ } SIMDLevel = NoSIMD;
+
+ bool HasAtomics = false;
+ bool HasNontrappingFPToInt = false;
+ bool HasSignExt = false;
+ bool HasExceptionHandling = false;
/// String name of used CPU.
std::string CPUString;
@@ -77,7 +82,10 @@ public:
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
- bool hasSIMD128() const { return HasSIMD128; }
+ bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
+ bool hasUnimplementedSIMD128() const {
+ return SIMDLevel >= UnimplementedSIMD128;
+ }
bool hasAtomics() const { return HasAtomics; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasSignExt() const { return HasSignExt; }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7c10f022cbbc..3bf8dd40892c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -58,10 +58,11 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
initializeOptimizeReturnedPass(PR);
initializeWebAssemblyArgumentMovePass(PR);
initializeWebAssemblySetP2AlignOperandsPass(PR);
+ initializeWebAssemblyEHRestoreStackPointerPass(PR);
initializeWebAssemblyReplacePhysRegsPass(PR);
initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
- initializeWebAssemblyStoreResultsPass(PR);
+ initializeWebAssemblyMemIntrinsicResultsPass(PR);
initializeWebAssemblyRegStackifyPass(PR);
initializeWebAssemblyRegColoringPass(PR);
initializeWebAssemblyExplicitLocalsPass(PR);
@@ -81,8 +82,12 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
//===----------------------------------------------------------------------===//
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
+ if (!RM.hasValue()) {
+ // Default to static relocation model. This should always be more optimial
+ // than PIC since the static linker can determine all global addresses and
+ // assume direct function calls.
+ return Reloc::Static;
+ }
return *RM;
}
@@ -96,7 +101,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
: "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- CM ? *CM : CodeModel::Large, OL),
+ getEffectiveCodeModel(CM, CodeModel::Large), OL),
TLOF(new WebAssemblyTargetObjectFile()) {
// WebAssembly type-checks instructions, but a noreturn function with a return
// type that doesn't match the context will cause a check failure. So we lower
@@ -149,7 +154,7 @@ class StripThreadLocal final : public ModulePass {
// pass just converts all GlobalVariables to NotThreadLocal
static char ID;
- public:
+public:
StripThreadLocal() : ModulePass(ID) {}
bool runOnModule(Module &M) override {
for (auto &GV : M.globals())
@@ -280,6 +285,9 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
void WebAssemblyPassConfig::addPreEmitPass() {
TargetPassConfig::addPreEmitPass();
+ // Restore __stack_pointer global after an exception is thrown.
+ addPass(createWebAssemblyEHRestoreStackPointer());
+
// Now that we have a prologue and epilogue and all frame indices are
// rewritten, eliminate SP and FP. This allows them to be stackified,
// colored, and numbered with the rest of the registers.
@@ -290,6 +298,12 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// order of the arguments.
addPass(createWebAssemblyCallIndirectFixup());
+ // Eliminate multiple-entry loops.
+ addPass(createWebAssemblyFixIrreducibleControlFlow());
+
+ // Do various transformations for exception handling.
+ addPass(createWebAssemblyLateEHPrepare());
+
if (getOptLevel() != CodeGenOpt::None) {
// LiveIntervals isn't commonly run this late. Re-establish preconditions.
addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -297,13 +311,14 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Depend on LiveIntervals and perform some optimizations on it.
addPass(createWebAssemblyOptimizeLiveIntervals());
- // Prepare store instructions for register stackifying.
- addPass(createWebAssemblyStoreResults());
+ // Prepare memory intrinsic calls for register stackifying.
+ addPass(createWebAssemblyMemIntrinsicResults());
// Mark registers as representing wasm's value stack. This is a key
// code-compression technique in WebAssembly. We run this pass (and
- // StoreResults above) very late, so that it sees as much code as possible,
- // including code emitted by PEI and expanded by late tail duplication.
+ // MemIntrinsicResults above) very late, so that it sees as much code as
+ // possible, including code emitted by PEI and expanded by late tail
+ // duplication.
addPass(createWebAssemblyRegStackify());
// Run the register coloring pass to reduce the total number of registers.
@@ -312,17 +327,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyRegColoring());
}
- // Eliminate multiple-entry loops. Do this before inserting explicit get_local
- // and set_local operators because we create a new variable that we want
- // converted into a local.
- addPass(createWebAssemblyFixIrreducibleControlFlow());
-
- // Insert explicit get_local and set_local operators.
+ // Insert explicit local.get and local.set operators.
addPass(createWebAssemblyExplicitLocals());
- // Do various transformations for exception handling
- addPass(createWebAssemblyLateEHPrepare());
-
// Sort the blocks of the CFG into topological order, a prerequisite for
// BLOCK and LOOP markers.
addPass(createWebAssemblyCFGSort());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 5944cea5abd1..ada6fb9a96d7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -27,14 +27,26 @@ const char *const WebAssembly::PersonalityWrapperFn =
bool WebAssembly::isArgument(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- case WebAssembly::ARGUMENT_I32:
- case WebAssembly::ARGUMENT_I64:
- case WebAssembly::ARGUMENT_F32:
- case WebAssembly::ARGUMENT_F64:
+ case WebAssembly::ARGUMENT_i32:
+ case WebAssembly::ARGUMENT_i32_S:
+ case WebAssembly::ARGUMENT_i64:
+ case WebAssembly::ARGUMENT_i64_S:
+ case WebAssembly::ARGUMENT_f32:
+ case WebAssembly::ARGUMENT_f32_S:
+ case WebAssembly::ARGUMENT_f64:
+ case WebAssembly::ARGUMENT_f64_S:
case WebAssembly::ARGUMENT_v16i8:
+ case WebAssembly::ARGUMENT_v16i8_S:
case WebAssembly::ARGUMENT_v8i16:
+ case WebAssembly::ARGUMENT_v8i16_S:
case WebAssembly::ARGUMENT_v4i32:
+ case WebAssembly::ARGUMENT_v4i32_S:
+ case WebAssembly::ARGUMENT_v2i64:
+ case WebAssembly::ARGUMENT_v2i64_S:
case WebAssembly::ARGUMENT_v4f32:
+ case WebAssembly::ARGUMENT_v4f32_S:
+ case WebAssembly::ARGUMENT_v2f64:
+ case WebAssembly::ARGUMENT_v2f64_S:
return true;
default:
return false;
@@ -44,9 +56,15 @@ bool WebAssembly::isArgument(const MachineInstr &MI) {
bool WebAssembly::isCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::COPY_I32:
+ case WebAssembly::COPY_I32_S:
case WebAssembly::COPY_I64:
+ case WebAssembly::COPY_I64_S:
case WebAssembly::COPY_F32:
+ case WebAssembly::COPY_F32_S:
case WebAssembly::COPY_F64:
+ case WebAssembly::COPY_F64_S:
+ case WebAssembly::COPY_V128:
+ case WebAssembly::COPY_V128_S:
return true;
default:
return false;
@@ -56,9 +74,15 @@ bool WebAssembly::isCopy(const MachineInstr &MI) {
bool WebAssembly::isTee(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::TEE_I32:
+ case WebAssembly::TEE_I32_S:
case WebAssembly::TEE_I64:
+ case WebAssembly::TEE_I64_S:
case WebAssembly::TEE_F32:
+ case WebAssembly::TEE_F32_S:
case WebAssembly::TEE_F64:
+ case WebAssembly::TEE_F64_S:
+ case WebAssembly::TEE_V128:
+ case WebAssembly::TEE_V128_S:
return true;
default:
return false;
@@ -81,15 +105,29 @@ bool WebAssembly::isChild(const MachineInstr &MI,
bool WebAssembly::isCallDirect(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
case WebAssembly::CALL_I32:
+ case WebAssembly::CALL_I32_S:
case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_I64_S:
case WebAssembly::CALL_F32:
+ case WebAssembly::CALL_F32_S:
case WebAssembly::CALL_F64:
+ case WebAssembly::CALL_F64_S:
case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
case WebAssembly::CALL_EXCEPT_REF:
+ case WebAssembly::CALL_EXCEPT_REF_S:
return true;
default:
return false;
@@ -99,15 +137,29 @@ bool WebAssembly::isCallDirect(const MachineInstr &MI) {
bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I32_S:
case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_I64_S:
case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F32_S:
case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_F64_S:
case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+ case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
return true;
default:
return false;
@@ -117,18 +169,54 @@ bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
return 0;
case WebAssembly::CALL_I32:
+ case WebAssembly::CALL_I32_S:
case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_I64_S:
case WebAssembly::CALL_F32:
+ case WebAssembly::CALL_F32_S:
case WebAssembly::CALL_F64:
+ case WebAssembly::CALL_F64_S:
+ case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
+ case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
+ case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
+ case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
case WebAssembly::CALL_EXCEPT_REF:
+ case WebAssembly::CALL_EXCEPT_REF_S:
case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I32_S:
case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_I64_S:
case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F32_S:
case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_F64_S:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+ case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
return 1;
default:
llvm_unreachable("Not a call instruction");
@@ -138,11 +226,17 @@ unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
bool WebAssembly::isMarker(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
+ case WebAssembly::BLOCK_S:
case WebAssembly::END_BLOCK:
+ case WebAssembly::END_BLOCK_S:
case WebAssembly::LOOP:
+ case WebAssembly::LOOP_S:
case WebAssembly::END_LOOP:
+ case WebAssembly::END_LOOP_S:
case WebAssembly::TRY:
+ case WebAssembly::TRY_S:
case WebAssembly::END_TRY:
+ case WebAssembly::END_TRY_S:
return true;
default:
return false;
@@ -152,7 +246,9 @@ bool WebAssembly::isMarker(const MachineInstr &MI) {
bool WebAssembly::isThrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::THROW_I32:
+ case WebAssembly::THROW_I32_S:
case WebAssembly::THROW_I64:
+ case WebAssembly::THROW_I64_S:
return true;
default:
return false;
@@ -162,7 +258,9 @@ bool WebAssembly::isThrow(const MachineInstr &MI) {
bool WebAssembly::isRethrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::RETHROW:
+ case WebAssembly::RETHROW_S:
case WebAssembly::RETHROW_TO_CALLER:
+ case WebAssembly::RETHROW_TO_CALLER_S:
return true;
default:
return false;
@@ -172,8 +270,11 @@ bool WebAssembly::isRethrow(const MachineInstr &MI) {
bool WebAssembly::isCatch(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::CATCH_I32:
+ case WebAssembly::CATCH_I32_S:
case WebAssembly::CATCH_I64:
+ case WebAssembly::CATCH_I64_S:
case WebAssembly::CATCH_ALL:
+ case WebAssembly::CATCH_ALL_S:
return true;
default:
return false;
@@ -183,8 +284,11 @@ bool WebAssembly::isCatch(const MachineInstr &MI) {
bool WebAssembly::mayThrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case WebAssembly::THROW_I32:
+ case WebAssembly::THROW_I32_S:
case WebAssembly::THROW_I64:
+ case WebAssembly::THROW_I64_S:
case WebAssembly::RETHROW:
+ case WebAssembly::RETHROW_S:
return true;
}
if (isCallIndirect(MI))
@@ -212,7 +316,9 @@ bool WebAssembly::isCatchTerminatePad(const MachineBasicBlock &MBB) {
bool SeenCatch = false;
for (auto &MI : MBB) {
if (MI.getOpcode() == WebAssembly::CATCH_I32 ||
- MI.getOpcode() == WebAssembly::CATCH_I64)
+ MI.getOpcode() == WebAssembly::CATCH_I64 ||
+ MI.getOpcode() == WebAssembly::CATCH_I32_S ||
+ MI.getOpcode() == WebAssembly::CATCH_I64_S)
SeenCatch = true;
if (SeenCatch && MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
@@ -229,7 +335,8 @@ bool WebAssembly::isCatchAllTerminatePad(const MachineBasicBlock &MBB) {
return false;
bool SeenCatchAll = false;
for (auto &MI : MBB) {
- if (MI.getOpcode() == WebAssembly::CATCH_ALL)
+ if (MI.getOpcode() == WebAssembly::CATCH_ALL ||
+ MI.getOpcode() == WebAssembly::CATCH_ALL_S)
SeenCatchAll = true;
if (SeenCatchAll && MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8b7b250e1a09..899b50d0f78f 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -804,8 +804,8 @@ private:
return Parser.Error(L, Msg, Range);
}
- std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
- Error(Loc, Msg);
+ std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
+ Error(Loc, Msg, R);
return nullptr;
}
@@ -835,7 +835,10 @@ private:
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
- std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc MemStart);
+ std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
+ const MCExpr *&Disp,
+ const SMLoc &StartLoc,
+ SMLoc &EndLoc);
bool ParseIntelMemoryOperandSize(unsigned &Size);
std::unique_ptr<X86Operand>
@@ -852,6 +855,7 @@ private:
bool parseDirectiveFPOSetFrame(SMLoc L);
bool parseDirectiveFPOPushReg(SMLoc L);
bool parseDirectiveFPOStackAlloc(SMLoc L);
+ bool parseDirectiveFPOStackAlign(SMLoc L);
bool parseDirectiveFPOEndPrologue(SMLoc L);
bool parseDirectiveFPOEndProc(SMLoc L);
bool parseDirectiveFPOData(SMLoc L);
@@ -1010,8 +1014,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
// and then only in non-64-bit modes.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
(Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
- BaseReg != X86::SI && BaseReg != X86::DI)) &&
- BaseReg != X86::DX) {
+ BaseReg != X86::SI && BaseReg != X86::DI))) {
ErrMsg = "invalid 16-bit base register";
return true;
}
@@ -1102,10 +1105,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
- X86II::isX86_64ExtendedReg(RegNo))
- return Error(StartLoc, "register %"
- + Tok.getString() + " is only available in 64-bit mode",
+ X86II::isX86_64ExtendedReg(RegNo)) {
+ StringRef RegName = Tok.getString();
+ Parser.Lex(); // Eat register name.
+ return Error(StartLoc,
+ "register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
+ }
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -1935,49 +1941,61 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
- default:
- // Parse a memory operand with no segment register.
- return ParseMemOperand(0, Parser.getTok().getLoc());
- case AsmToken::Percent: {
- // Read the register.
- unsigned RegNo;
- SMLoc Start, End;
- if (ParseRegister(RegNo, Start, End)) return nullptr;
- if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
- Error(Start, "%eiz and %riz can only be used as index registers",
- SMRange(Start, End));
- return nullptr;
- }
- if (RegNo == X86::RIP) {
- Error(Start, "%rip can only be used as a base register",
- SMRange(Start, End));
- return nullptr;
- }
-
- // If this is a segment register followed by a ':', then this is the start
- // of a memory reference, otherwise this is a normal register reference.
- if (getLexer().isNot(AsmToken::Colon))
- return X86Operand::CreateReg(RegNo, Start, End);
-
- if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
- return ErrorOperand(Start, "invalid segment register");
-
- getParser().Lex(); // Eat the colon.
- return ParseMemOperand(RegNo, Start);
- }
case AsmToken::Dollar: {
- // $42 -> immediate.
+ // $42 or $ID -> immediate.
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
- if (getParser().parseExpression(Val, End))
+ // This is an immediate, so we should not parse a register. Do a precheck
+ // for '%' to supercede intra-register parse errors.
+ SMLoc L = Parser.getTok().getLoc();
+ if (check(getLexer().is(AsmToken::Percent), L,
+ "expected immediate expression") ||
+ getParser().parseExpression(Val, End) ||
+ check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
- case AsmToken::LCurly:{
+ case AsmToken::LCurly: {
SMLoc Start = Parser.getTok().getLoc();
return ParseRoundingModeOp(Start);
}
+ default: {
+ // This a memory operand or a register. We have some parsing complications
+ // as a '(' may be part of an immediate expression or the addressing mode
+ // block. This is complicated by the fact that an assembler-level variable
+ // may refer either to a register or an immediate expression.
+
+ SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
+ const MCExpr *Expr = nullptr;
+ unsigned Reg = 0;
+ if (getLexer().isNot(AsmToken::LParen)) {
+ // No '(' so this is either a displacement expression or a register.
+ if (Parser.parseExpression(Expr, EndLoc))
+ return nullptr;
+ if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
+ // Segment Register. Reset Expr and copy value to register.
+ Expr = nullptr;
+ Reg = RE->getRegNo();
+
+ // Sanity check register.
+ if (Reg == X86::EIZ || Reg == X86::RIZ)
+ return ErrorOperand(
+ Loc, "%eiz and %riz can only be used as index registers",
+ SMRange(Loc, EndLoc));
+ if (Reg == X86::RIP)
+ return ErrorOperand(Loc, "%rip can only be used as a base register",
+ SMRange(Loc, EndLoc));
+ // Return register that are not segment prefixes immediately.
+ if (!Parser.parseOptionalToken(AsmToken::Colon))
+ return X86Operand::CreateReg(Reg, Loc, EndLoc);
+ if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
+ return ErrorOperand(Loc, "invalid segment register");
+ }
+ }
+ // This is a Memory operand.
+ return ParseMemOperand(Reg, Expr, Loc, EndLoc);
+ }
}
}
@@ -2086,199 +2104,201 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
return false;
}
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
-/// has already been parsed if present.
+/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
+/// has already been parsed if present. disp may be provided as well.
std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
- SMLoc MemStart) {
-
+ const MCExpr *&Disp,
+ const SMLoc &StartLoc,
+ SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
- // We have to disambiguate a parenthesized expression "(4+5)" from the start
- // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
- // only way to do this without lookahead is to eat the '(' and see what is
- // after it.
- const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
- if (getLexer().isNot(AsmToken::LParen)) {
- SMLoc ExprEnd;
- if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
- // Disp may be a variable, handle register values.
- if (auto *RE = dyn_cast<X86MCExpr>(Disp))
- return X86Operand::CreateReg(RE->getRegNo(), MemStart, ExprEnd);
-
- // After parsing the base expression we could either have a parenthesized
- // memory address or not. If not, return now. If so, eat the (.
- if (getLexer().isNot(AsmToken::LParen)) {
- // Unless we have a segment register, treat this as an immediate.
- if (SegReg == 0)
- return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
- MemStart, ExprEnd);
+ SMLoc Loc;
+ // Based on the initial passed values, we may be in any of these cases, we are
+ // in one of these cases (with current position (*)):
+
+ // 1. seg : * disp (base-index-scale-expr)
+ // 2. seg : *(disp) (base-index-scale-expr)
+ // 3. seg : *(base-index-scale-expr)
+ // 4. disp *(base-index-scale-expr)
+ // 5. *(disp) (base-index-scale-expr)
+ // 6. *(base-index-scale-expr)
+ // 7. disp *
+ // 8. *(disp)
+
+ // If we do not have an displacement yet, check if we're in cases 4 or 6 by
+ // checking if the first object after the parenthesis is a register (or an
+ // identifier referring to a register) and parse the displacement or default
+ // to 0 as appropriate.
+ auto isAtMemOperand = [this]() {
+ if (this->getLexer().isNot(AsmToken::LParen))
+ return false;
+ AsmToken Buf[2];
+ StringRef Id;
+ auto TokCount = this->getLexer().peekTokens(Buf, true);
+ if (TokCount == 0)
+ return false;
+ switch (Buf[0].getKind()) {
+ case AsmToken::Percent:
+ case AsmToken::Comma:
+ return true;
+ // These lower cases are doing a peekIdentifier.
+ case AsmToken::At:
+ case AsmToken::Dollar:
+ if ((TokCount > 1) &&
+ (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
+ (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
+ Id = StringRef(Buf[0].getLoc().getPointer(),
+ Buf[1].getIdentifier().size() + 1);
+ break;
+ case AsmToken::Identifier:
+ case AsmToken::String:
+ Id = Buf[0].getIdentifier();
+ break;
+ default:
+ return false;
}
+ // We have an ID. Check if it is bound to a register.
+ if (!Id.empty()) {
+ MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
+ if (Sym->isVariable()) {
+ auto V = Sym->getVariableValue(/*SetUsed*/ false);
+ return isa<X86MCExpr>(V);
+ }
+ }
+ return false;
+ };
- // Eat the '('.
- Parser.Lex();
- } else {
- // Okay, we have a '('. We don't know if this is an expression or not, but
- // so we have to eat the ( to see beyond it.
- SMLoc LParenLoc = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '('.
-
- if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
- // Nothing to do here, fall into the code below with the '(' part of the
- // memory operand consumed.
- } else {
- SMLoc ExprEnd;
- getLexer().UnLex(AsmToken(AsmToken::LParen, "("));
-
- // It must be either an parenthesized expression, or an expression that
- // begins from a parenthesized expression, parse it now. Example: (1+2) or
- // (1+2)+3
- if (getParser().parseExpression(Disp, ExprEnd))
+ if (!Disp) {
+ // Parse immediate if we're not at a mem operand yet.
+ if (!isAtMemOperand()) {
+ if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
return nullptr;
+ assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
+ } else {
+ // Disp is implicitly zero if we haven't parsed it yet.
+ Disp = MCConstantExpr::create(0, Parser.getContext());
+ }
+ }
- // After parsing the base expression we could either have a parenthesized
- // memory address or not. If not, return now. If so, eat the (.
- if (getLexer().isNot(AsmToken::LParen)) {
- // Unless we have a segment register, treat this as an immediate.
- if (SegReg == 0)
- return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
- ExprEnd);
- return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
- MemStart, ExprEnd);
- }
+ // We are now either at the end of the operand or at the '(' at the start of a
+ // base-index-scale-expr.
- // Eat the '('.
- Parser.Lex();
- }
+ if (!parseOptionalToken(AsmToken::LParen)) {
+ if (SegReg == 0)
+ return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
+ return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+ StartLoc, EndLoc);
}
- // If we reached here, then we just ate the ( of the memory operand. Process
+ // If we reached here, then eat the '(' and Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
- SMLoc IndexLoc, BaseLoc;
-
- if (getLexer().is(AsmToken::Percent)) {
- SMLoc StartLoc, EndLoc;
- BaseLoc = Parser.getTok().getLoc();
- if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
- if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
- Error(StartLoc, "eiz and riz can only be used as index registers",
- SMRange(StartLoc, EndLoc));
+ SMLoc BaseLoc = getLexer().getLoc();
+ const MCExpr *E;
+ StringRef ErrMsg;
+
+ // Parse BaseReg if one is provided.
+ if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
+ if (Parser.parseExpression(E, EndLoc) ||
+ check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
return nullptr;
- }
- }
- if (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
- IndexLoc = Parser.getTok().getLoc();
+ // Sanity check register.
+ BaseReg = cast<X86MCExpr>(E)->getRegNo();
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
+ return ErrorOperand(BaseLoc,
+ "eiz and riz can only be used as index registers",
+ SMRange(BaseLoc, EndLoc));
+ }
+ if (parseOptionalToken(AsmToken::Comma)) {
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
// correctly.
//
- // Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
- if (getLexer().is(AsmToken::Percent)) {
- SMLoc L;
- if (ParseRegister(IndexReg, L, L))
+ // Even though it would be completely consistent to support syntax like
+ // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
return nullptr;
- if (BaseReg == X86::RIP) {
- Error(IndexLoc, "%rip as base register can not have an index register");
- return nullptr;
- }
- if (IndexReg == X86::RIP) {
- Error(IndexLoc, "%rip is not allowed as an index register");
- return nullptr;
- }
-
- if (getLexer().isNot(AsmToken::RParen)) {
- // Parse the scale amount:
- // ::= ',' [scale-expression]
- if (parseToken(AsmToken::Comma, "expected comma in scale expression"))
- return nullptr;
- if (getLexer().isNot(AsmToken::RParen)) {
- SMLoc Loc = Parser.getTok().getLoc();
-
- int64_t ScaleVal;
- if (getParser().parseAbsoluteExpression(ScaleVal)){
- Error(Loc, "expected scale expression");
- return nullptr;
- }
-
- // Validate the scale amount.
- if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
- ScaleVal != 1) {
- Error(Loc, "scale factor in 16-bit address must be 1");
- return nullptr;
- }
- if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
- ScaleVal != 8) {
- Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
- return nullptr;
+ if (!isa<X86MCExpr>(E)) {
+ // We've parsed an unexpected Scale Value instead of an index
+ // register. Interpret it as an absolute.
+ int64_t ScaleVal;
+ if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
+ return ErrorOperand(Loc, "expected absolute expression");
+ if (ScaleVal != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
+ } else { // IndexReg Found.
+ IndexReg = cast<X86MCExpr>(E)->getRegNo();
+
+ if (BaseReg == X86::RIP)
+ return ErrorOperand(
+ Loc, "%rip as base register can not have an index register");
+ if (IndexReg == X86::RIP)
+ return ErrorOperand(Loc, "%rip is not allowed as an index register");
+
+ if (parseOptionalToken(AsmToken::Comma)) {
+ // Parse the scale amount:
+ // ::= ',' [scale-expression]
+
+ // A scale amount without an index is ignored.
+ if (getLexer().isNot(AsmToken::RParen)) {
+ int64_t ScaleVal;
+ if (Parser.parseTokenLoc(Loc) ||
+ Parser.parseAbsoluteExpression(ScaleVal))
+ return ErrorOperand(Loc, "expected scale expression");
+ Scale = (unsigned)ScaleVal;
+ // Validate the scale amount.
+ if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+ Scale != 1)
+ return ErrorOperand(Loc,
+ "scale factor in 16-bit address must be 1");
+ if (checkScale(Scale, ErrMsg))
+ return ErrorOperand(Loc, ErrMsg);
}
- Scale = (unsigned)ScaleVal;
}
}
- } else if (getLexer().isNot(AsmToken::RParen)) {
- // A scale amount without an index is ignored.
- // index.
- SMLoc Loc = Parser.getTok().getLoc();
-
- int64_t Value;
- if (getParser().parseAbsoluteExpression(Value))
- return nullptr;
-
- if (Value != 1)
- Warning(Loc, "scale factor without index register is ignored");
- Scale = 1;
}
}
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
- SMLoc MemEnd = Parser.getTok().getEndLoc();
if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
return nullptr;
- // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
- // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
- // documented form in various unofficial manuals, so a lot of code uses it.
- if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 &&
- SegReg == 0 && isa<MCConstantExpr>(Disp) &&
- cast<MCConstantExpr>(Disp)->getValue() == 0)
+ // This is to support otherwise illegal operand (%dx) found in various
+ // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
+ // be supported. Mark such DX variants separately fix only in special cases.
+ if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
+ isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
- StringRef ErrMsg;
if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
- ErrMsg)) {
- Error(BaseLoc, ErrMsg);
- return nullptr;
- }
+ ErrMsg))
+ return ErrorOperand(BaseLoc, ErrMsg);
if (SegReg || BaseReg || IndexReg)
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
- IndexReg, Scale, MemStart, MemEnd);
- return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
+ IndexReg, Scale, StartLoc, EndLoc);
+ return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
}
// Parse either a standard primary expression or a register.
bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
- if (Parser.parsePrimaryExpr(Res, EndLoc)) {
+ // See if this is a register first.
+ if (getTok().is(AsmToken::Percent) ||
+ (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
+ MatchRegisterName(Parser.getTok().getString()))) {
SMLoc StartLoc = Parser.getTok().getLoc();
- // Normal Expression parse fails, check if it could be a register.
unsigned RegNo;
- bool TryRegParse =
- getTok().is(AsmToken::Percent) ||
- (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier));
- if (!TryRegParse || ParseRegister(RegNo, StartLoc, EndLoc))
+ if (ParseRegister(RegNo, StartLoc, EndLoc))
return true;
- // Clear previous parse error and return correct expression.
- Parser.clearPendingErrors();
Res = X86MCExpr::create(RegNo, Parser.getContext());
return false;
}
-
- return false;
+ return Parser.parsePrimaryExpr(Res, EndLoc);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -3282,7 +3302,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
- getParser().setParsingInlineAsm(false);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@@ -3295,7 +3314,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
- getParser().setParsingInlineAsm(true);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
@@ -3315,6 +3333,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveFPOPushReg(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_stackalloc")
return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
+ else if (IDVal == ".cv_fpo_stackalign")
+ return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endprologue")
return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endproc")
@@ -3429,6 +3449,16 @@ bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
return getTargetStreamer().emitFPOStackAlloc(Offset, L);
}
+// .cv_fpo_stackalign 8
+bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ int64_t Offset;
+ if (Parser.parseIntToken(Offset, "expected offset") ||
+ Parser.parseEOL("unexpected tokens"))
+ return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
+ return getTargetStreamer().emitFPOStackAlign(Offset, L);
+}
+
// .cv_fpo_endprologue
bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
MCAsmParser &Parser = getParser();
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 1ac304f3be03..54d550b60652 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -983,45 +983,18 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
insn->opcode == 0xE3)
attrMask ^= ATTR_ADSIZE;
- /*
- * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
- * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
- */
-
- if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
- switch (insn->opcode) {
- case 0xE8:
- case 0xE9:
- // Take care of psubsb and other mmx instructions.
- if (insn->opcodeType == ONEBYTE) {
- attrMask ^= ATTR_OPSIZE;
- insn->immediateSize = 4;
- insn->displacementSize = 4;
- }
- break;
- case 0x82:
- case 0x83:
- case 0x84:
- case 0x85:
- case 0x86:
- case 0x87:
- case 0x88:
- case 0x89:
- case 0x8A:
- case 0x8B:
- case 0x8C:
- case 0x8D:
- case 0x8E:
- case 0x8F:
- // Take care of lea and three byte ops.
- if (insn->opcodeType == TWOBYTE) {
- attrMask ^= ATTR_OPSIZE;
- insn->immediateSize = 4;
- insn->displacementSize = 4;
- }
- break;
- }
- }
+ // If we're in 16-bit mode and this is one of the relative jumps and opsize
+ // prefix isn't present, we need to force the opsize attribute since the
+ // prefix is inverted relative to 32-bit mode.
+ if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+ insn->opcodeType == ONEBYTE &&
+ (insn->opcode == 0xE8 || insn->opcode == 0xE9))
+ attrMask |= ATTR_OPSIZE;
+
+ if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+ insn->opcodeType == TWOBYTE &&
+ insn->opcode >= 0x80 && insn->opcode <= 0x8F)
+ attrMask |= ATTR_OPSIZE;
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -1420,7 +1393,7 @@ static int readModRM(struct InternalInstruction* insn) {
break;
case 0x1:
insn->displacementSize = 1;
- /* FALLTHROUGH */
+ LLVM_FALLTHROUGH;
case 0x2:
insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
switch (rm & 7) {
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 0e4c4398e49d..64e6fb9f0375 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -540,7 +540,6 @@ protected:
unsigned InstrOffset = 0;
unsigned StackAdjust = 0;
unsigned StackSize = 0;
- unsigned PrevStackSize = 0;
unsigned NumDefCFAOffsets = 0;
for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
@@ -588,7 +587,6 @@ protected:
// L0:
// .cfi_def_cfa_offset 80
//
- PrevStackSize = StackSize;
StackSize = std::abs(Inst.getOffset()) / StackDivide;
++NumDefCFAOffsets;
break;
@@ -635,16 +633,6 @@ protected:
CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
} else {
- // If the amount of the stack allocation is the size of a register, then
- // we "push" the RAX/EAX register onto the stack instead of adjusting the
- // stack pointer with a SUB instruction. We don't support the push of the
- // RAX/EAX register with compact unwind. So we check for that situation
- // here.
- if ((NumDefCFAOffsets == SavedRegIdx + 1 &&
- StackSize - PrevStackSize == 1) ||
- (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2))
- return CU::UNWIND_MODE_DWARF;
-
SubtractInstrIdx += InstrOffset;
++StackAdjust;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 497e29fe628e..c85ce9bbd5a4 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -231,6 +231,11 @@ namespace X86II {
/// to be an absolute symbol in range [0,128), so we can use the @ABS8
/// symbol modifier.
MO_ABS8,
+
+ /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
+ /// stub symbols on windows.
+ MO_COFFSTUB,
};
enum : uint64_t {
@@ -261,12 +266,12 @@ namespace X86II {
RawFrmSrc = 4,
/// RawFrmDst - This form is for instructions that use the destination index
- /// register DI/EDI/ESI.
+ /// register DI/EDI/RDI.
RawFrmDst = 5,
- /// RawFrmSrc - This form is for instructions that use the source index
- /// register SI/ESI/ERI with a possible segment override, and also the
- /// destination index register DI/ESI/RDI.
+ /// RawFrmDstSrc - This form is for instructions that use the source index
+ /// register SI/ESI/RSI with a possible segment override, and also the
+ /// destination index register DI/EDI/RDI.
RawFrmDstSrc = 6,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f1d15e66918b..ea4aaf14223d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
#include "X86GenRegisterInfo.inc"
#define GET_INSTRINFO_MC_DESC
-#define GET_GENINSTRINFO_MC_HELPERS
+#define GET_INSTRINFO_MC_HELPERS
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
@@ -81,120 +81,193 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
codeview::RegisterId CVReg;
MCPhysReg Reg;
} RegMap[] = {
- { codeview::RegisterId::CVRegAL, X86::AL},
- { codeview::RegisterId::CVRegCL, X86::CL},
- { codeview::RegisterId::CVRegDL, X86::DL},
- { codeview::RegisterId::CVRegBL, X86::BL},
- { codeview::RegisterId::CVRegAH, X86::AH},
- { codeview::RegisterId::CVRegCH, X86::CH},
- { codeview::RegisterId::CVRegDH, X86::DH},
- { codeview::RegisterId::CVRegBH, X86::BH},
- { codeview::RegisterId::CVRegAX, X86::AX},
- { codeview::RegisterId::CVRegCX, X86::CX},
- { codeview::RegisterId::CVRegDX, X86::DX},
- { codeview::RegisterId::CVRegBX, X86::BX},
- { codeview::RegisterId::CVRegSP, X86::SP},
- { codeview::RegisterId::CVRegBP, X86::BP},
- { codeview::RegisterId::CVRegSI, X86::SI},
- { codeview::RegisterId::CVRegDI, X86::DI},
- { codeview::RegisterId::CVRegEAX, X86::EAX},
- { codeview::RegisterId::CVRegECX, X86::ECX},
- { codeview::RegisterId::CVRegEDX, X86::EDX},
- { codeview::RegisterId::CVRegEBX, X86::EBX},
- { codeview::RegisterId::CVRegESP, X86::ESP},
- { codeview::RegisterId::CVRegEBP, X86::EBP},
- { codeview::RegisterId::CVRegESI, X86::ESI},
- { codeview::RegisterId::CVRegEDI, X86::EDI},
-
- { codeview::RegisterId::CVRegEFLAGS, X86::EFLAGS},
-
- { codeview::RegisterId::CVRegST0, X86::FP0},
- { codeview::RegisterId::CVRegST1, X86::FP1},
- { codeview::RegisterId::CVRegST2, X86::FP2},
- { codeview::RegisterId::CVRegST3, X86::FP3},
- { codeview::RegisterId::CVRegST4, X86::FP4},
- { codeview::RegisterId::CVRegST5, X86::FP5},
- { codeview::RegisterId::CVRegST6, X86::FP6},
- { codeview::RegisterId::CVRegST7, X86::FP7},
-
- { codeview::RegisterId::CVRegXMM0, X86::XMM0},
- { codeview::RegisterId::CVRegXMM1, X86::XMM1},
- { codeview::RegisterId::CVRegXMM2, X86::XMM2},
- { codeview::RegisterId::CVRegXMM3, X86::XMM3},
- { codeview::RegisterId::CVRegXMM4, X86::XMM4},
- { codeview::RegisterId::CVRegXMM5, X86::XMM5},
- { codeview::RegisterId::CVRegXMM6, X86::XMM6},
- { codeview::RegisterId::CVRegXMM7, X86::XMM7},
-
- { codeview::RegisterId::CVRegXMM8, X86::XMM8},
- { codeview::RegisterId::CVRegXMM9, X86::XMM9},
- { codeview::RegisterId::CVRegXMM10, X86::XMM10},
- { codeview::RegisterId::CVRegXMM11, X86::XMM11},
- { codeview::RegisterId::CVRegXMM12, X86::XMM12},
- { codeview::RegisterId::CVRegXMM13, X86::XMM13},
- { codeview::RegisterId::CVRegXMM14, X86::XMM14},
- { codeview::RegisterId::CVRegXMM15, X86::XMM15},
-
- { codeview::RegisterId::CVRegSIL, X86::SIL},
- { codeview::RegisterId::CVRegDIL, X86::DIL},
- { codeview::RegisterId::CVRegBPL, X86::BPL},
- { codeview::RegisterId::CVRegSPL, X86::SPL},
- { codeview::RegisterId::CVRegRAX, X86::RAX},
- { codeview::RegisterId::CVRegRBX, X86::RBX},
- { codeview::RegisterId::CVRegRCX, X86::RCX},
- { codeview::RegisterId::CVRegRDX, X86::RDX},
- { codeview::RegisterId::CVRegRSI, X86::RSI},
- { codeview::RegisterId::CVRegRDI, X86::RDI},
- { codeview::RegisterId::CVRegRBP, X86::RBP},
- { codeview::RegisterId::CVRegRSP, X86::RSP},
- { codeview::RegisterId::CVRegR8, X86::R8},
- { codeview::RegisterId::CVRegR9, X86::R9},
- { codeview::RegisterId::CVRegR10, X86::R10},
- { codeview::RegisterId::CVRegR11, X86::R11},
- { codeview::RegisterId::CVRegR12, X86::R12},
- { codeview::RegisterId::CVRegR13, X86::R13},
- { codeview::RegisterId::CVRegR14, X86::R14},
- { codeview::RegisterId::CVRegR15, X86::R15},
- { codeview::RegisterId::CVRegR8B, X86::R8B},
- { codeview::RegisterId::CVRegR9B, X86::R9B},
- { codeview::RegisterId::CVRegR10B, X86::R10B},
- { codeview::RegisterId::CVRegR11B, X86::R11B},
- { codeview::RegisterId::CVRegR12B, X86::R12B},
- { codeview::RegisterId::CVRegR13B, X86::R13B},
- { codeview::RegisterId::CVRegR14B, X86::R14B},
- { codeview::RegisterId::CVRegR15B, X86::R15B},
- { codeview::RegisterId::CVRegR8W, X86::R8W},
- { codeview::RegisterId::CVRegR9W, X86::R9W},
- { codeview::RegisterId::CVRegR10W, X86::R10W},
- { codeview::RegisterId::CVRegR11W, X86::R11W},
- { codeview::RegisterId::CVRegR12W, X86::R12W},
- { codeview::RegisterId::CVRegR13W, X86::R13W},
- { codeview::RegisterId::CVRegR14W, X86::R14W},
- { codeview::RegisterId::CVRegR15W, X86::R15W},
- { codeview::RegisterId::CVRegR8D, X86::R8D},
- { codeview::RegisterId::CVRegR9D, X86::R9D},
- { codeview::RegisterId::CVRegR10D, X86::R10D},
- { codeview::RegisterId::CVRegR11D, X86::R11D},
- { codeview::RegisterId::CVRegR12D, X86::R12D},
- { codeview::RegisterId::CVRegR13D, X86::R13D},
- { codeview::RegisterId::CVRegR14D, X86::R14D},
- { codeview::RegisterId::CVRegR15D, X86::R15D},
- { codeview::RegisterId::CVRegAMD64_YMM0, X86::YMM0},
- { codeview::RegisterId::CVRegAMD64_YMM1, X86::YMM1},
- { codeview::RegisterId::CVRegAMD64_YMM2, X86::YMM2},
- { codeview::RegisterId::CVRegAMD64_YMM3, X86::YMM3},
- { codeview::RegisterId::CVRegAMD64_YMM4, X86::YMM4},
- { codeview::RegisterId::CVRegAMD64_YMM5, X86::YMM5},
- { codeview::RegisterId::CVRegAMD64_YMM6, X86::YMM6},
- { codeview::RegisterId::CVRegAMD64_YMM7, X86::YMM7},
- { codeview::RegisterId::CVRegAMD64_YMM8, X86::YMM8},
- { codeview::RegisterId::CVRegAMD64_YMM9, X86::YMM9},
- { codeview::RegisterId::CVRegAMD64_YMM10, X86::YMM10},
- { codeview::RegisterId::CVRegAMD64_YMM11, X86::YMM11},
- { codeview::RegisterId::CVRegAMD64_YMM12, X86::YMM12},
- { codeview::RegisterId::CVRegAMD64_YMM13, X86::YMM13},
- { codeview::RegisterId::CVRegAMD64_YMM14, X86::YMM14},
- { codeview::RegisterId::CVRegAMD64_YMM15, X86::YMM15},
+ {codeview::RegisterId::AL, X86::AL},
+ {codeview::RegisterId::CL, X86::CL},
+ {codeview::RegisterId::DL, X86::DL},
+ {codeview::RegisterId::BL, X86::BL},
+ {codeview::RegisterId::AH, X86::AH},
+ {codeview::RegisterId::CH, X86::CH},
+ {codeview::RegisterId::DH, X86::DH},
+ {codeview::RegisterId::BH, X86::BH},
+ {codeview::RegisterId::AX, X86::AX},
+ {codeview::RegisterId::CX, X86::CX},
+ {codeview::RegisterId::DX, X86::DX},
+ {codeview::RegisterId::BX, X86::BX},
+ {codeview::RegisterId::SP, X86::SP},
+ {codeview::RegisterId::BP, X86::BP},
+ {codeview::RegisterId::SI, X86::SI},
+ {codeview::RegisterId::DI, X86::DI},
+ {codeview::RegisterId::EAX, X86::EAX},
+ {codeview::RegisterId::ECX, X86::ECX},
+ {codeview::RegisterId::EDX, X86::EDX},
+ {codeview::RegisterId::EBX, X86::EBX},
+ {codeview::RegisterId::ESP, X86::ESP},
+ {codeview::RegisterId::EBP, X86::EBP},
+ {codeview::RegisterId::ESI, X86::ESI},
+ {codeview::RegisterId::EDI, X86::EDI},
+
+ {codeview::RegisterId::EFLAGS, X86::EFLAGS},
+
+ {codeview::RegisterId::ST0, X86::FP0},
+ {codeview::RegisterId::ST1, X86::FP1},
+ {codeview::RegisterId::ST2, X86::FP2},
+ {codeview::RegisterId::ST3, X86::FP3},
+ {codeview::RegisterId::ST4, X86::FP4},
+ {codeview::RegisterId::ST5, X86::FP5},
+ {codeview::RegisterId::ST6, X86::FP6},
+ {codeview::RegisterId::ST7, X86::FP7},
+
+ {codeview::RegisterId::XMM0, X86::XMM0},
+ {codeview::RegisterId::XMM1, X86::XMM1},
+ {codeview::RegisterId::XMM2, X86::XMM2},
+ {codeview::RegisterId::XMM3, X86::XMM3},
+ {codeview::RegisterId::XMM4, X86::XMM4},
+ {codeview::RegisterId::XMM5, X86::XMM5},
+ {codeview::RegisterId::XMM6, X86::XMM6},
+ {codeview::RegisterId::XMM7, X86::XMM7},
+
+ {codeview::RegisterId::XMM8, X86::XMM8},
+ {codeview::RegisterId::XMM9, X86::XMM9},
+ {codeview::RegisterId::XMM10, X86::XMM10},
+ {codeview::RegisterId::XMM11, X86::XMM11},
+ {codeview::RegisterId::XMM12, X86::XMM12},
+ {codeview::RegisterId::XMM13, X86::XMM13},
+ {codeview::RegisterId::XMM14, X86::XMM14},
+ {codeview::RegisterId::XMM15, X86::XMM15},
+
+ {codeview::RegisterId::SIL, X86::SIL},
+ {codeview::RegisterId::DIL, X86::DIL},
+ {codeview::RegisterId::BPL, X86::BPL},
+ {codeview::RegisterId::SPL, X86::SPL},
+ {codeview::RegisterId::RAX, X86::RAX},
+ {codeview::RegisterId::RBX, X86::RBX},
+ {codeview::RegisterId::RCX, X86::RCX},
+ {codeview::RegisterId::RDX, X86::RDX},
+ {codeview::RegisterId::RSI, X86::RSI},
+ {codeview::RegisterId::RDI, X86::RDI},
+ {codeview::RegisterId::RBP, X86::RBP},
+ {codeview::RegisterId::RSP, X86::RSP},
+ {codeview::RegisterId::R8, X86::R8},
+ {codeview::RegisterId::R9, X86::R9},
+ {codeview::RegisterId::R10, X86::R10},
+ {codeview::RegisterId::R11, X86::R11},
+ {codeview::RegisterId::R12, X86::R12},
+ {codeview::RegisterId::R13, X86::R13},
+ {codeview::RegisterId::R14, X86::R14},
+ {codeview::RegisterId::R15, X86::R15},
+ {codeview::RegisterId::R8B, X86::R8B},
+ {codeview::RegisterId::R9B, X86::R9B},
+ {codeview::RegisterId::R10B, X86::R10B},
+ {codeview::RegisterId::R11B, X86::R11B},
+ {codeview::RegisterId::R12B, X86::R12B},
+ {codeview::RegisterId::R13B, X86::R13B},
+ {codeview::RegisterId::R14B, X86::R14B},
+ {codeview::RegisterId::R15B, X86::R15B},
+ {codeview::RegisterId::R8W, X86::R8W},
+ {codeview::RegisterId::R9W, X86::R9W},
+ {codeview::RegisterId::R10W, X86::R10W},
+ {codeview::RegisterId::R11W, X86::R11W},
+ {codeview::RegisterId::R12W, X86::R12W},
+ {codeview::RegisterId::R13W, X86::R13W},
+ {codeview::RegisterId::R14W, X86::R14W},
+ {codeview::RegisterId::R15W, X86::R15W},
+ {codeview::RegisterId::R8D, X86::R8D},
+ {codeview::RegisterId::R9D, X86::R9D},
+ {codeview::RegisterId::R10D, X86::R10D},
+ {codeview::RegisterId::R11D, X86::R11D},
+ {codeview::RegisterId::R12D, X86::R12D},
+ {codeview::RegisterId::R13D, X86::R13D},
+ {codeview::RegisterId::R14D, X86::R14D},
+ {codeview::RegisterId::R15D, X86::R15D},
+ {codeview::RegisterId::AMD64_YMM0, X86::YMM0},
+ {codeview::RegisterId::AMD64_YMM1, X86::YMM1},
+ {codeview::RegisterId::AMD64_YMM2, X86::YMM2},
+ {codeview::RegisterId::AMD64_YMM3, X86::YMM3},
+ {codeview::RegisterId::AMD64_YMM4, X86::YMM4},
+ {codeview::RegisterId::AMD64_YMM5, X86::YMM5},
+ {codeview::RegisterId::AMD64_YMM6, X86::YMM6},
+ {codeview::RegisterId::AMD64_YMM7, X86::YMM7},
+ {codeview::RegisterId::AMD64_YMM8, X86::YMM8},
+ {codeview::RegisterId::AMD64_YMM9, X86::YMM9},
+ {codeview::RegisterId::AMD64_YMM10, X86::YMM10},
+ {codeview::RegisterId::AMD64_YMM11, X86::YMM11},
+ {codeview::RegisterId::AMD64_YMM12, X86::YMM12},
+ {codeview::RegisterId::AMD64_YMM13, X86::YMM13},
+ {codeview::RegisterId::AMD64_YMM14, X86::YMM14},
+ {codeview::RegisterId::AMD64_YMM15, X86::YMM15},
+ {codeview::RegisterId::AMD64_YMM16, X86::YMM16},
+ {codeview::RegisterId::AMD64_YMM17, X86::YMM17},
+ {codeview::RegisterId::AMD64_YMM18, X86::YMM18},
+ {codeview::RegisterId::AMD64_YMM19, X86::YMM19},
+ {codeview::RegisterId::AMD64_YMM20, X86::YMM20},
+ {codeview::RegisterId::AMD64_YMM21, X86::YMM21},
+ {codeview::RegisterId::AMD64_YMM22, X86::YMM22},
+ {codeview::RegisterId::AMD64_YMM23, X86::YMM23},
+ {codeview::RegisterId::AMD64_YMM24, X86::YMM24},
+ {codeview::RegisterId::AMD64_YMM25, X86::YMM25},
+ {codeview::RegisterId::AMD64_YMM26, X86::YMM26},
+ {codeview::RegisterId::AMD64_YMM27, X86::YMM27},
+ {codeview::RegisterId::AMD64_YMM28, X86::YMM28},
+ {codeview::RegisterId::AMD64_YMM29, X86::YMM29},
+ {codeview::RegisterId::AMD64_YMM30, X86::YMM30},
+ {codeview::RegisterId::AMD64_YMM31, X86::YMM31},
+ {codeview::RegisterId::AMD64_ZMM0, X86::ZMM0},
+ {codeview::RegisterId::AMD64_ZMM1, X86::ZMM1},
+ {codeview::RegisterId::AMD64_ZMM2, X86::ZMM2},
+ {codeview::RegisterId::AMD64_ZMM3, X86::ZMM3},
+ {codeview::RegisterId::AMD64_ZMM4, X86::ZMM4},
+ {codeview::RegisterId::AMD64_ZMM5, X86::ZMM5},
+ {codeview::RegisterId::AMD64_ZMM6, X86::ZMM6},
+ {codeview::RegisterId::AMD64_ZMM7, X86::ZMM7},
+ {codeview::RegisterId::AMD64_ZMM8, X86::ZMM8},
+ {codeview::RegisterId::AMD64_ZMM9, X86::ZMM9},
+ {codeview::RegisterId::AMD64_ZMM10, X86::ZMM10},
+ {codeview::RegisterId::AMD64_ZMM11, X86::ZMM11},
+ {codeview::RegisterId::AMD64_ZMM12, X86::ZMM12},
+ {codeview::RegisterId::AMD64_ZMM13, X86::ZMM13},
+ {codeview::RegisterId::AMD64_ZMM14, X86::ZMM14},
+ {codeview::RegisterId::AMD64_ZMM15, X86::ZMM15},
+ {codeview::RegisterId::AMD64_ZMM16, X86::ZMM16},
+ {codeview::RegisterId::AMD64_ZMM17, X86::ZMM17},
+ {codeview::RegisterId::AMD64_ZMM18, X86::ZMM18},
+ {codeview::RegisterId::AMD64_ZMM19, X86::ZMM19},
+ {codeview::RegisterId::AMD64_ZMM20, X86::ZMM20},
+ {codeview::RegisterId::AMD64_ZMM21, X86::ZMM21},
+ {codeview::RegisterId::AMD64_ZMM22, X86::ZMM22},
+ {codeview::RegisterId::AMD64_ZMM23, X86::ZMM23},
+ {codeview::RegisterId::AMD64_ZMM24, X86::ZMM24},
+ {codeview::RegisterId::AMD64_ZMM25, X86::ZMM25},
+ {codeview::RegisterId::AMD64_ZMM26, X86::ZMM26},
+ {codeview::RegisterId::AMD64_ZMM27, X86::ZMM27},
+ {codeview::RegisterId::AMD64_ZMM28, X86::ZMM28},
+ {codeview::RegisterId::AMD64_ZMM29, X86::ZMM29},
+ {codeview::RegisterId::AMD64_ZMM30, X86::ZMM30},
+ {codeview::RegisterId::AMD64_ZMM31, X86::ZMM31},
+ {codeview::RegisterId::AMD64_K0, X86::K0},
+ {codeview::RegisterId::AMD64_K1, X86::K1},
+ {codeview::RegisterId::AMD64_K2, X86::K2},
+ {codeview::RegisterId::AMD64_K3, X86::K3},
+ {codeview::RegisterId::AMD64_K4, X86::K4},
+ {codeview::RegisterId::AMD64_K5, X86::K5},
+ {codeview::RegisterId::AMD64_K6, X86::K6},
+ {codeview::RegisterId::AMD64_K7, X86::K7},
+ {codeview::RegisterId::AMD64_XMM16, X86::XMM16},
+ {codeview::RegisterId::AMD64_XMM17, X86::XMM17},
+ {codeview::RegisterId::AMD64_XMM18, X86::XMM18},
+ {codeview::RegisterId::AMD64_XMM19, X86::XMM19},
+ {codeview::RegisterId::AMD64_XMM20, X86::XMM20},
+ {codeview::RegisterId::AMD64_XMM21, X86::XMM21},
+ {codeview::RegisterId::AMD64_XMM22, X86::XMM22},
+ {codeview::RegisterId::AMD64_XMM23, X86::XMM23},
+ {codeview::RegisterId::AMD64_XMM24, X86::XMM24},
+ {codeview::RegisterId::AMD64_XMM25, X86::XMM25},
+ {codeview::RegisterId::AMD64_XMM26, X86::XMM26},
+ {codeview::RegisterId::AMD64_XMM27, X86::XMM27},
+ {codeview::RegisterId::AMD64_XMM28, X86::XMM28},
+ {codeview::RegisterId::AMD64_XMM29, X86::XMM29},
+ {codeview::RegisterId::AMD64_XMM30, X86::XMM30},
+ {codeview::RegisterId::AMD64_XMM31, X86::XMM31},
+
};
for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
@@ -307,83 +380,19 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
public:
X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
- bool isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const override;
+#define GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
+
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
APInt &Mask) const override;
+ std::vector<std::pair<uint64_t, uint64_t>>
+ findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotSectionVA,
+ const Triple &TargetTriple) const override;
};
-bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
- const MCInst &Inst) const {
- if (STI.getCPU() == "btver2") {
- // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
- // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
- switch (Inst.getOpcode()) {
- default:
- return false;
- case X86::SUB32rr:
- case X86::SUB64rr:
- case X86::SBB32rr:
- case X86::SBB64rr:
- case X86::XOR32rr:
- case X86::XOR64rr:
- case X86::XORPSrr:
- case X86::XORPDrr:
- case X86::VXORPSrr:
- case X86::VXORPDrr:
- case X86::ANDNPSrr:
- case X86::VANDNPSrr:
- case X86::ANDNPDrr:
- case X86::VANDNPDrr:
- case X86::PXORrr:
- case X86::VPXORrr:
- case X86::PANDNrr:
- case X86::VPANDNrr:
- case X86::PSUBBrr:
- case X86::PSUBWrr:
- case X86::PSUBDrr:
- case X86::PSUBQrr:
- case X86::VPSUBBrr:
- case X86::VPSUBWrr:
- case X86::VPSUBDrr:
- case X86::VPSUBQrr:
- case X86::PCMPEQBrr:
- case X86::PCMPEQWrr:
- case X86::PCMPEQDrr:
- case X86::PCMPEQQrr:
- case X86::VPCMPEQBrr:
- case X86::VPCMPEQWrr:
- case X86::VPCMPEQDrr:
- case X86::VPCMPEQQrr:
- case X86::PCMPGTBrr:
- case X86::PCMPGTWrr:
- case X86::PCMPGTDrr:
- case X86::PCMPGTQrr:
- case X86::VPCMPGTBrr:
- case X86::VPCMPGTWrr:
- case X86::VPCMPGTDrr:
- case X86::VPCMPGTQrr:
- case X86::MMX_PXORirr:
- case X86::MMX_PANDNirr:
- case X86::MMX_PSUBBirr:
- case X86::MMX_PSUBDirr:
- case X86::MMX_PSUBQirr:
- case X86::MMX_PSUBWirr:
- case X86::MMX_PCMPGTBirr:
- case X86::MMX_PCMPGTDirr:
- case X86::MMX_PCMPGTWirr:
- case X86::MMX_PCMPEQBirr:
- case X86::MMX_PCMPEQDirr:
- case X86::MMX_PCMPEQWirr:
- return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
- case X86::CMP32rr:
- case X86::CMP64rr:
- return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
- }
- }
-
- return false;
-}
+#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
const MCInst &Inst,
@@ -437,6 +446,64 @@ bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
return Mask.getBoolValue();
}
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA) {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+ // Recognize a jmp.
+ if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0xa3) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // address of the base of the .got.plt section plus the immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(
+ std::make_pair(PltSectionVA + Byte, GotPltSectionVA + Imm));
+ Byte += 6;
+ } else if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+ Byte += 6;
+ } else
+ Byte++;
+ }
+ return Result;
+}
+
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86_64PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents) {
+ // Do a lightweight parsing of PLT entries.
+ std::vector<std::pair<uint64_t, uint64_t>> Result;
+ for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+ // Recognize a jmp.
+ if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+ // The jmp instruction at the beginning of each PLT entry jumps to the
+ // address of the next instruction plus the immediate.
+ uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+ Result.push_back(
+ std::make_pair(PltSectionVA + Byte, PltSectionVA + Byte + 6 + Imm));
+ Byte += 6;
+ } else
+ Byte++;
+ }
+ return Result;
+}
+
+std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
+ uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+ uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+ switch (TargetTriple.getArch()) {
+ case Triple::x86:
+ return findX86PltEntries(PltSectionVA, PltContents, GotPltSectionVA);
+ case Triple::x86_64:
+ return findX86_64PltEntries(PltSectionVA, PltContents);
+ default:
+ return {};
+ }
+}
+
} // end of namespace X86_MC
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 595c26d31e3f..4e9f5ba60d2e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -134,7 +134,7 @@ unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
// Defines symbolic names for the X86 instructions.
//
#define GET_INSTRINFO_ENUM
-#define GET_GENINSTRINFO_MC_DECL
+#define GET_INSTRINFO_MC_HELPER_DECLS
#include "X86GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
index 8d38cd32b82c..10a282dd2962 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
@@ -26,6 +26,7 @@ public:
virtual bool emitFPOData(const MCSymbol *ProcSym, SMLoc L = {}) = 0;
virtual bool emitFPOPushReg(unsigned Reg, SMLoc L = {}) = 0;
virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L = {}) = 0;
+ virtual bool emitFPOStackAlign(unsigned Align, SMLoc L = {}) = 0;
virtual bool emitFPOSetFrame(unsigned Reg, SMLoc L = {}) = 0;
};
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index a5e115e5ff4d..2aec695b2dbf 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -79,7 +79,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case FK_SecRel_4:
return COFF::IMAGE_REL_AMD64_SECREL;
default:
- llvm_unreachable("unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+ return COFF::IMAGE_REL_AMD64_ADDR32;
}
} else if (getMachine() == COFF::IMAGE_FILE_MACHINE_I386) {
switch (FixupKind) {
@@ -100,7 +101,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
case FK_SecRel_4:
return COFF::IMAGE_REL_I386_SECREL;
default:
- llvm_unreachable("unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+ return COFF::IMAGE_REL_I386_DIR32;
}
} else
llvm_unreachable("Unsupported COFF machine type.");
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index 093dab4f2f96..bee9b7046338 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -38,6 +38,7 @@ public:
bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+ bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
};
@@ -47,6 +48,7 @@ struct FPOInstruction {
enum Operation {
PushReg,
StackAlloc,
+ StackAlign,
SetFrame,
} Op;
unsigned RegOrOffset;
@@ -90,6 +92,7 @@ public:
bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+ bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
};
} // end namespace
@@ -133,6 +136,11 @@ bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc,
return false;
}
+bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+ OS << "\t.cv_fpo_stackalign\t" << Align << '\n';
+ return false;
+}
+
bool X86WinCOFFAsmTargetStreamer::emitFPOSetFrame(unsigned Reg, SMLoc L) {
OS << "\t.cv_fpo_setframe\t";
InstPrinter.printRegName(OS, Reg);
@@ -226,6 +234,24 @@ bool X86WinCOFFTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) {
return false;
}
+bool X86WinCOFFTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+ if (checkInFPOPrologue(L))
+ return true;
+ if (!llvm::any_of(CurFPOData->Instructions, [](const FPOInstruction &Inst) {
+ return Inst.Op == FPOInstruction::SetFrame;
+ })) {
+ getContext().reportError(
+ L, "a frame register must be established before aligning the stack");
+ return true;
+ }
+ FPOInstruction Inst;
+ Inst.Label = emitFPOLabel();
+ Inst.Op = FPOInstruction::StackAlign;
+ Inst.RegOrOffset = Align;
+ CurFPOData->Instructions.push_back(Inst);
+ return false;
+}
+
bool X86WinCOFFTargetStreamer::emitFPOEndPrologue(SMLoc L) {
if (checkInFPOPrologue(L))
return true;
@@ -250,6 +276,8 @@ struct FPOStateMachine {
unsigned CurOffset = 0;
unsigned LocalSize = 0;
unsigned SavedRegSize = 0;
+ unsigned StackOffsetBeforeAlign = 0;
+ unsigned StackAlign = 0;
unsigned Flags = 0; // FIXME: Set HasSEH / HasEH.
SmallString<128> FrameFunc;
@@ -291,24 +319,39 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) {
FrameFunc.clear();
raw_svector_ostream FuncOS(FrameFunc);
const MCRegisterInfo *MRI = OS.getContext().getRegisterInfo();
+ assert((StackAlign == 0 || FrameReg != 0) &&
+ "cannot align stack without frame reg");
+ StringRef CFAVar = StackAlign == 0 ? "$T0" : "$T1";
+
if (FrameReg) {
// CFA is FrameReg + FrameRegOff.
- FuncOS << "$T0 " << printFPOReg(MRI, FrameReg) << " " << FrameRegOff
+ FuncOS << CFAVar << ' ' << printFPOReg(MRI, FrameReg) << ' ' << FrameRegOff
<< " + = ";
+
+ // Assign $T0, the VFRAME register, the value of ESP after it is aligned.
+ // Starting from the CFA, we subtract the size of all pushed registers, and
+ // align the result. While we don't store any CSRs in this area, $T0 is used
+ // by S_DEFRANGE_FRAMEPOINTER_REL records to find local variables.
+ if (StackAlign) {
+ FuncOS << "$T0 " << CFAVar << ' ' << StackOffsetBeforeAlign << " - "
+ << StackAlign << " @ = ";
+ }
} else {
// The address of return address is ESP + CurOffset, but we use .raSearch to
// match MSVC. This seems to ask the debugger to subtract some combination
// of LocalSize and SavedRegSize from ESP and grovel around in that memory
// to find the address of a plausible return address.
- FuncOS << "$T0 .raSearch = ";
+ FuncOS << CFAVar << " .raSearch = ";
}
// Caller's $eip should be dereferenced CFA, and $esp should be CFA plus 4.
- FuncOS << "$eip $T0 ^ = $esp $T0 4 + = ";
+ FuncOS << "$eip " << CFAVar << " ^ = ";
+ FuncOS << "$esp " << CFAVar << " 4 + = ";
// Each saved register is stored at an unchanging negative CFA offset.
for (RegSaveOffset RO : RegSaveOffsets)
- FuncOS << printFPOReg(MRI, RO.Reg) << " $T0 " << RO.Offset << " - ^ = ";
+ FuncOS << printFPOReg(MRI, RO.Reg) << ' ' << CFAVar << ' ' << RO.Offset
+ << " - ^ = ";
// Add it to the CV string table.
CodeViewContext &CVCtx = OS.getContext().getCVContext();
@@ -380,6 +423,10 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
FSM.FrameReg = Inst.RegOrOffset;
FSM.FrameRegOff = FSM.CurOffset;
break;
+ case FPOInstruction::StackAlign:
+ FSM.StackOffsetBeforeAlign = FSM.CurOffset;
+ FSM.StackAlign = Inst.RegOrOffset;
+ break;
case FPOInstruction::StackAlloc:
FSM.CurOffset += Inst.RegOrOffset;
FSM.LocalSize += Inst.RegOrOffset;
diff --git a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
index 9a39455f9dd5..ab2cebcb58ee 100644
--- a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
+++ b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
@@ -31,10 +31,6 @@
using namespace llvm;
-namespace llvm {
-void initializeShadowCallStackPass(PassRegistry &);
-}
-
namespace {
class ShadowCallStack : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index fe567f4cece8..bed940d0d0e9 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -304,12 +304,12 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
}
}
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
uint64_t M = RawMask[i];
- if (M == (uint64_t)SM_SentinelUndef) {
- ShuffleMask.push_back(M);
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
// For 256/512-bit vectors the base of the shuffle is the 128-bit
@@ -336,7 +336,7 @@ void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
}
}
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
@@ -354,12 +354,12 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
// 6 - Most significant bit of source byte replicated in all bit positions.
// 7 - Invert most significant bit of source byte and replicate in all bit positions.
for (int i = 0, e = RawMask.size(); i < e; ++i) {
- uint64_t M = RawMask[i];
- if (M == (uint64_t)SM_SentinelUndef) {
- ShuffleMask.push_back(M);
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
+ uint64_t M = RawMask[i];
uint64_t PermuteOp = (M >> 5) & 0x7;
if (PermuteOp == 4) {
ShuffleMask.push_back(SM_SentinelZero);
@@ -490,7 +490,7 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
}
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = NumElts * ScalarBits;
unsigned NumLanes = VecSize / 128;
@@ -500,6 +500,10 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
uint64_t M = RawMask[i];
M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
@@ -508,7 +512,7 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
}
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = NumElts * ScalarBits;
unsigned NumLanes = VecSize / 128;
@@ -518,6 +522,11 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
assert((NumElts == RawMask.size()) && "Unexpected mask size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
// VPERMIL2 Operation.
// Bits[3] - Match Bit.
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
@@ -548,19 +557,29 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
}
}
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
uint64_t EltMaskSize = RawMask.size() - 1;
- for (auto M : RawMask) {
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
M &= EltMaskSize;
ShuffleMask.push_back((int)M);
}
}
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask) {
uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
- for (auto M : RawMask) {
+ for (int i = 0, e = RawMask.size(); i != e; ++i) {
+ if (UndefElts[i]) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t M = RawMask[i];
M &= EltMaskSize;
ShuffleMask.push_back((int)M);
}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 6d13bd58a127..85cde14a3241 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
//===----------------------------------------------------------------------===//
@@ -108,7 +109,7 @@ void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
/// Decode a PSHUFB mask from a raw array of constants such as from
/// BUILD_VECTOR.
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a BLEND immediate mask into a shuffle mask.
@@ -131,7 +132,7 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
/// BUILD_VECTOR.
/// This can only basic masks (permutes + zeros), not any of the other
/// operations that VPPERM can perform.
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a zero extension instruction as a shuffle mask.
@@ -156,20 +157,20 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
- ArrayRef<uint64_t> RawMask,
+ ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 73bb0f2af285..1c8813815b86 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -75,6 +75,9 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
+/// Return a pass that folds conditional branch jumps.
+FunctionPass *createX86CondBrFolding();
+
/// Return a pass that avoids creating store forward block issues in the hardware.
FunctionPass *createX86AvoidStoreForwardingBlocks();
@@ -112,8 +115,6 @@ FunctionPass *createX86FixupBWInsts();
/// to another, when profitable.
FunctionPass *createX86DomainReassignmentPass();
-void initializeFixupBWInstPassPass(PassRegistry &);
-
/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
@@ -121,14 +122,33 @@ FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86RetpolineThunksPass();
+/// This pass ensures instructions featuring a memory operand
+/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
+FunctionPass *createX86DiscriminateMemOpsPass();
+
+/// This pass applies profiling information to insert cache prefetches.
+FunctionPass *createX86InsertPrefetchPass();
+
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
-void initializeEvexToVexInstPassPass(PassRegistry &);
-
FunctionPass *createX86SpeculativeLoadHardeningPass();
+void initializeEvexToVexInstPassPass(PassRegistry &);
+void initializeFixupBWInstPassPass(PassRegistry &);
+void initializeFixupLEAPassPass(PassRegistry &);
+void initializeShadowCallStackPass(PassRegistry &);
+void initializeWinEHStatePassPass(PassRegistry &);
+void initializeX86AvoidSFBPassPass(PassRegistry &);
+void initializeX86CallFrameOptimizationPass(PassRegistry &);
+void initializeX86CmovConverterPassPass(PassRegistry &);
+void initializeX86CondBrFoldingPassPass(PassRegistry &);
+void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86ExecutionDomainFixPass(PassRegistry &);
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 63c2dc4da6cc..6b1749fc7500 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -59,10 +59,7 @@ def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
"Support xsaves instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
- "Enable SSE instructions",
- // SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
- [FeatureCMOV]>;
+ "Enable SSE instructions">;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -91,17 +88,19 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
[Feature3DNow]>;
// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
-// without disabling 64-bit mode.
+// without disabling 64-bit mode. Nothing should imply this feature bit. It
+// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
- "Support 64-bit instructions",
- [FeatureCMOV]>;
+ "Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
- "64-bit with cmpxchg16b",
- [Feature64Bit]>;
+ "64-bit with cmpxchg16b">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow">;
+def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+ "true",
+ "PMADDWD is slower than PMULLD">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
@@ -362,17 +361,30 @@ def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
-// Enable mitigation of some aspects of speculative execution related
-// vulnerabilities by removing speculatable indirect branches. This disables
-// jump-table formation, rewrites explicit `indirectbr` instructions into
-// `switch` instructions, and uses a special construct called a "retpoline" to
-// prevent speculation of the remaining indirect branches (indirect calls and
-// tail calls).
+// Lower indirect calls using a special construct called a `retpoline` to
+// mitigate potential Spectre v2 attacks against them.
+def FeatureRetpolineIndirectCalls
+ : SubtargetFeature<
+ "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
+ "Remove speculation of indirect calls from the generated code.">;
+
+// Lower indirect branches and switches either using conditional branch trees
+// or using a special construct called a `retpoline` to mitigate potential
+// Spectre v2 attacks against them.
+def FeatureRetpolineIndirectBranches
+ : SubtargetFeature<
+ "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
+ "Remove speculation of indirect branches from the generated code.">;
+
+// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
+// `retpoline-indirect-branches` above.
def FeatureRetpoline
- : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+ : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
"Remove speculation of indirect branches from the "
"generated code, either by avoiding them entirely or "
- "lowering them with a speculation blocking construct.">;
+ "lowering them with a speculation blocking construct.",
+ [FeatureRetpolineIndirectCalls,
+ FeatureRetpolineIndirectBranches]>;
// Rely on external thunks for the emitted retpoline calls. This allows users
// to provide their own custom thunk definitions in highly specialized
@@ -380,8 +392,10 @@ def FeatureRetpoline
def FeatureRetpolineExternalThunk
: SubtargetFeature<
"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
- "Enable retpoline, but with an externally provided thunk.",
- [FeatureRetpoline]>;
+ "When lowering an indirect call or branch using a `retpoline`, rely "
+ "on the specified user provided thunk rather than emitting one "
+ "ourselves. Only has effect when combined with some other retpoline "
+ "feature.", [FeatureRetpolineIndirectCalls]>;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
@@ -389,6 +403,25 @@ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction">;
+def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+ "Indicates that the BEXTR instruction is implemented as a single uop "
+ "with good throughput.">;
+
+// Combine vector math operations with shuffles into horizontal math
+// instructions if a CPU implements horizontal operations (introduced with
+// SSE3) with better latency/throughput than the alternative sequence.
+def FeatureFastHorizontalOps
+ : SubtargetFeature<
+ "fast-hops", "HasFastHorizontalOps", "true",
+ "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
+ "normal vector instructions with shuffles", [FeatureSSE3]>;
+
+// Merge branches using three-way conditional code.
+def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
+ "ThreewayBranchProfitable", "true",
+ "Merge branches to a three-way "
+ "conditional branch">;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -416,6 +449,7 @@ include "X86SchedHaswell.td"
include "X86SchedBroadwell.td"
include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
+include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
@@ -430,22 +464,6 @@ def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
"Intel Goldmont Plus processors">;
def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
"Intel Tremont processors">;
-def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily",
- "IntelHaswell", "Intel Haswell processors">;
-def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily",
- "IntelBroadwell", "Intel Broadwell processors">;
-def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily",
- "IntelSkylake", "Intel Skylake processors">;
-def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily",
- "IntelKNL", "Intel Knights Landing processors">;
-def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily",
- "IntelSKX", "Intel Skylake Server processors">;
-def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily",
- "IntelCannonlake", "Intel Cannonlake processors">;
-def ProcIntelICL : SubtargetFeature<"icelake-client", "X86ProcFamily",
- "IntelIcelakeClient", "Intel Icelake processors">;
-def ProcIntelICX : SubtargetFeature<"icelake-server", "X86ProcFamily",
- "IntelIcelakeServer", "Intel Icelake Server processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -466,7 +484,7 @@ def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -481,12 +499,12 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcessorModel<P, GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Intel Quark.
@@ -495,19 +513,21 @@ def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B
]>;
@@ -515,10 +535,12 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -526,10 +548,12 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -540,10 +564,12 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeatureLEAForSP,
@@ -560,15 +586,16 @@ def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
ProcIntelSLM,
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeaturePOPCNT,
FeaturePCLMUL,
- FeatureAES,
FeatureSlowDivide64,
FeatureSlowTwoMemOps,
FeaturePRFCHW,
@@ -594,10 +621,12 @@ class ProcModel<string Name, SchedMachineModel Model,
def GLMFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureMOVBE,
FeaturePOPCNT,
@@ -653,10 +682,12 @@ def : TremontProc<"tremont">;
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
FeatureLAHFSAHF,
@@ -669,13 +700,14 @@ def : NehalemProc<"corei7">;
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
- FeatureAES,
FeaturePCLMUL,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -686,13 +718,14 @@ def : WestmereProc<"westmere">;
// rather than a superset.
def SNBFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePOPCNT,
- FeatureAES,
FeatureSlowDivide64,
FeaturePCLMUL,
FeatureXSAVE,
@@ -702,6 +735,7 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate,
FeatureSlowIncDec,
+ FeatureMergeToThreeWayBranch,
FeatureMacroFusion
]>;
@@ -741,7 +775,6 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
HSWFeatures.Value, [
- ProcIntelHSW,
FeaturePOPCNTFalseDeps,
FeatureLZCNTFalseDeps
]>;
@@ -755,15 +788,14 @@ def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
]>;
class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
BDWFeatures.Value, [
- ProcIntelBDW,
FeaturePOPCNTFalseDeps,
FeatureLZCNTFalseDeps
]>;
def : BroadwellProc<"broadwell">;
def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
+ FeatureAES,
FeatureMPX,
- FeatureRTM,
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT,
@@ -772,14 +804,32 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
SKLFeatures.Value, [
- ProcIntelSKL,
FeatureHasFastGather,
FeaturePOPCNTFalseDeps,
FeatureSGX
]>;
def : SkylakeClientProc<"skylake">;
-def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
+def KNLFeatures : ProcessorFeatures<[], [
+ FeatureX87,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureSlowDivide64,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF,
+ FeatureSlow3OpsLEA,
+ FeatureSlowIncDec,
+ FeatureAES,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
FeatureAVX512,
FeatureERI,
FeatureCDI,
@@ -798,19 +848,19 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
// FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
- ProcIntelKNL,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite,
- FeatureHasFastGather
+ FeatureHasFastGather,
+ FeatureSlowPMADDWD
]>;
def : KnightsLandingProc<"knl">;
class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
- ProcIntelKNL,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite,
FeatureHasFastGather,
+ FeatureSlowPMADDWD,
FeatureVPOPCNTDQ
]>;
def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
@@ -827,13 +877,23 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
SKXFeatures.Value, [
- ProcIntelSKX,
FeatureHasFastGather,
FeaturePOPCNTFalseDeps
]>;
def : SkylakeServerProc<"skylake-avx512">;
def : SkylakeServerProc<"skx">; // Legacy alias.
+def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
+ FeatureVNNI
+]>;
+
+class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
+ CLXFeatures.Value, [
+ FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps
+]>;
+def : CascadelakeProc<"cascadelake">;
+
def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
FeatureAVX512,
FeatureCDI,
@@ -849,7 +909,6 @@ def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
CNLFeatures.Value, [
- ProcIntelCNL,
FeatureHasFastGather
]>;
def : CannonlakeProc<"cannonlake">;
@@ -868,14 +927,12 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
ICLFeatures.Value, [
- ProcIntelICL,
FeatureHasFastGather
]>;
def : IcelakeClientProc<"icelake-client">;
class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
ICLFeatures.Value, [
- ProcIntelICX,
FeaturePCONFIG,
FeatureWBNOINVD,
FeatureHasFastGather
@@ -889,39 +946,43 @@ def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
foreach P = ["athlon", "athlon-tbird"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
+ FeatureCMOV]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
+ FeatureCMOV, Feature64Bit]>;
}
foreach P = ["amdfam10", "barcelona"] in {
def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureLAHFSAHF]>;
+ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
}
// Bobcat
def : Proc<"btver1", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
FeatureFXSR,
FeatureNOPL,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureLZCNT,
@@ -934,11 +995,13 @@ def : Proc<"btver1", [
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureNOPL,
FeatureSSE4A,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureAES,
@@ -954,14 +1017,18 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast15ByteNOP,
- FeatureFastPartialYMMorZMMWrite
+ FeatureFastBEXTR,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureFastHorizontalOps
]>;
// Bulldozer
-def : Proc<"bdver1", [
+def : ProcessorModel<"bdver1", BdVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -981,10 +1048,12 @@ def : Proc<"bdver1", [
FeatureMacroFusion
]>;
// Piledriver
-def : Proc<"bdver2", [
+def : ProcessorModel<"bdver2", BdVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1005,14 +1074,17 @@ def : Proc<"bdver2", [
FeatureSlowSHLD,
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
+ FeatureFastBEXTR,
FeatureMacroFusion
]>;
// Steamroller
def : Proc<"bdver3", [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1035,18 +1107,21 @@ def : Proc<"bdver3", [
FeatureFSGSBase,
FeatureLAHFSAHF,
FeatureFast11ByteNOP,
+ FeatureFastBEXTR,
FeatureMacroFusion
]>;
// Excavator
def : Proc<"bdver4", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX2,
FeatureFXSR,
FeatureNOPL,
FeatureXOP,
FeatureFMA4,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
@@ -1064,6 +1139,7 @@ def : Proc<"bdver4", [
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF,
+ FeatureFastBEXTR,
FeatureFast11ByteNOP,
FeatureMWAITX,
FeatureMacroFusion
@@ -1078,6 +1154,8 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureBMI2,
FeatureCLFLUSHOPT,
FeatureCLZERO,
+ FeatureCMOV,
+ Feature64Bit,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
@@ -1087,6 +1165,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
+ FeatureFastBEXTR,
FeatureFast15ByteNOP,
FeatureMacroFusion,
FeatureMMX,
@@ -1112,7 +1191,7 @@ def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR]>;
+ FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1126,6 +1205,7 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE2,
FeatureFXSR,
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 7d8f7b9dfe46..36cef98a1ef5 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -88,19 +88,19 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void X86AsmPrinter::EmitFunctionBodyStart() {
if (EmitFPOData) {
- X86TargetStreamer *XTS =
- static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
- unsigned ParamsSize =
- MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize();
- XTS->emitFPOProc(CurrentFnSym, ParamsSize);
+ if (auto *XTS =
+ static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+ XTS->emitFPOProc(
+ CurrentFnSym,
+ MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize());
}
}
void X86AsmPrinter::EmitFunctionBodyEnd() {
if (EmitFPOData) {
- X86TargetStreamer *XTS =
- static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
- XTS->emitFPOEndProc();
+ if (auto *XTS =
+ static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+ XTS->emitFPOEndProc();
}
}
@@ -129,6 +129,9 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym =
P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+ else if (MO.getTargetFlags() == X86II::MO_COFFSTUB)
+ GVSym =
+ P.OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
@@ -161,6 +164,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
+ case X86II::MO_COFFSTUB:
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
@@ -568,9 +572,9 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emitting an Elf_Prop for the CET properties.
OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4);
- OutStreamer->EmitIntValue(WordSize, 4); // data size
- OutStreamer->EmitIntValue(FeatureFlagsAnd, WordSize); // data
- EmitAlignment(WordSize == 4 ? 2 : 3); // padding
+ OutStreamer->EmitIntValue(4, 4); // data size
+ OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data
+ EmitAlignment(WordSize == 4 ? 2 : 3); // padding
OutStreamer->endSection(Nt);
OutStreamer->SwitchSection(Cur);
@@ -583,21 +587,28 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TT.isOSBinFormatCOFF()) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
+ MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
+ OutStreamer->BeginCOFFSymbolDef(S);
+ OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer->EndCOFFSymbolDef();
+ int64_t Feat00Flags = 0;
+
if (TT.getArch() == Triple::x86) {
- MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
- OutStreamer->BeginCOFFSymbolDef(S);
- OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
- OutStreamer->EndCOFFSymbolDef();
// According to the PE-COFF spec, the LSB of this value marks the object
// for "registered SEH". This means that all SEH handler entry points
// must be registered in .sxdata. Use of any unregistered handlers will
// cause the process to terminate immediately. LLVM does not know how to
// register any SEH handlers, so its object files should be safe.
- OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
- OutStreamer->EmitAssignment(
- S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
+ Feat00Flags |= 1;
}
+
+ if (M.getModuleFlag("cfguardtable"))
+ Feat00Flags |= 0x800; // Object is CFG-aware.
+
+ OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+ OutStreamer->EmitAssignment(
+ S, MCConstantExpr::create(Feat00Flags, MMI->getContext()));
}
OutStreamer->EmitSyntaxDirective();
@@ -663,7 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
emitNonLazyStubs(MMI, *OutStreamer);
// Emit stack and fault map information.
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
FM.serializeToFaultMapSection();
// This flag tells the linker that no global symbols contain code that fall
@@ -684,12 +695,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (TT.isOSBinFormatCOFF()) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
return;
}
if (TT.isOSBinFormatELF()) {
- SM.serializeToStackMapSection();
+ emitStackMaps(SM);
FM.serializeToFaultMapSection();
return;
}
diff --git a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index ab2cbfc33e17..627a6cb14514 100644
--- a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -52,10 +52,6 @@ using namespace llvm;
#define DEBUG_TYPE "x86-avoid-SFB"
-namespace llvm {
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-} // end namespace llvm
-
static cl::opt<bool> DisableX86AvoidStoreForwardBlocks(
"x86-disable-avoid-SFB", cl::Hidden,
cl::desc("X86: Disable Store Forwarding Blocks fixup."), cl::init(false));
@@ -590,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies(
StDisp2 += OverlapDelta;
Size2 -= OverlapDelta;
}
- Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
+ Size1 = LdDisp2 - LdDisp1;
// Build a copy for the point until the current blocking store's
// displacement.
@@ -645,21 +641,22 @@ removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap) {
if (BlockingStoresDispSizeMap.size() <= 1)
return;
- int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first;
- unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second;
- SmallVector<int64_t, 2> ForRemoval;
- for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin());
- DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) {
- int64_t CurrDisp = DispSizePair->first;
- unsigned CurrSize = DispSizePair->second;
- if (CurrDisp + CurrSize <= PrevDisp + PrevSize) {
- ForRemoval.push_back(PrevDisp);
+ SmallVector<std::pair<int64_t, unsigned>, 0> DispSizeStack;
+ for (auto DispSizePair : BlockingStoresDispSizeMap) {
+ int64_t CurrDisp = DispSizePair.first;
+ unsigned CurrSize = DispSizePair.second;
+ while (DispSizeStack.size()) {
+ int64_t PrevDisp = DispSizeStack.back().first;
+ unsigned PrevSize = DispSizeStack.back().second;
+ if (CurrDisp + CurrSize > PrevDisp + PrevSize)
+ break;
+ DispSizeStack.pop_back();
}
- PrevDisp = CurrDisp;
- PrevSize = CurrSize;
+ DispSizeStack.push_back(DispSizePair);
}
- for (auto Disp : ForRemoval)
- BlockingStoresDispSizeMap.erase(Disp);
+ BlockingStoresDispSizeMap.clear();
+ for (auto Disp : DispSizeStack)
+ BlockingStoresDispSizeMap.insert(Disp);
}
bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index dea95f56f4d5..903d24c9984a 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -56,10 +56,6 @@ static cl::opt<bool>
cl::desc("Avoid optimizing x86 call frames for size"),
cl::init(false), cl::Hidden);
-namespace llvm {
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-}
-
namespace {
class X86CallFrameOptimization : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
index 96ea64dc8c48..1dc83b76595d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -65,10 +65,8 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
- if (SplitVTs.size() != 1) {
- // TODO: support struct/array split
- return false;
- }
+ if (OrigArg.Ty->isVoidTy())
+ return true;
EVT VT = SplitVTs[0];
unsigned NumParts = TLI.getNumRegisters(Context, VT);
@@ -185,27 +183,36 @@ protected:
} // end anonymous namespace
-bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
- assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
-
+bool X86CallLowering::lowerReturn(
+ MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const {
+ assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+ "Return value without a vreg");
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
- if (VReg) {
+ if (!VRegs.empty()) {
MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = MF.getDataLayout();
- const Function &F = MF.getFunction();
+ LLVMContext &Ctx = Val->getType()->getContext();
+ const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
- ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+ SmallVector<EVT, 4> SplitEVTs;
+ ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+ assert(VRegs.size() == SplitEVTs.size() &&
+ "For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> SplitArgs;
- if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
- MIRBuilder.buildUnmerge(Regs, VReg);
- }))
- return false;
+ for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+ ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI,
+ [&](ArrayRef<unsigned> Regs) {
+ MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+ }))
+ return false;
+ }
OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm/lib/Target/X86/X86CallLowering.h
index 6c9dc1565dad..f5f8f9a3ef6d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.h
@@ -29,8 +29,8 @@ class X86CallLowering : public CallLowering {
public:
X86CallLowering(const X86TargetLowering &TLI);
- bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
- unsigned VReg) const override;
+ bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<unsigned> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index fcc9a296de93..fe49c9ffbd95 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -590,9 +590,11 @@ def CC_X86_64_HHVM_C : CallingConv<[
// Calling convention used on Win64
def CC_X86_Win64_C : CallingConv<[
- // FIXME: Handle byval stuff.
// FIXME: Handle varargs.
+ // Byval aggregates are passed by pointer
+ CCIfByVal<CCPassIndirect<i64>>,
+
// Promote i1/v1i1 arguments to i8.
CCIfType<[i1, v1i1], CCPromoteToType<i8>>,
diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
index 1c5f110d8c60..c3e76fd2a856 100644
--- a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -81,12 +81,6 @@ STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
-namespace llvm {
-
-void initializeX86CmovConverterPassPass(PassRegistry &);
-
-} // end namespace llvm
-
// This internal switch can be used to turn off the cmov/branch optimization.
static cl::opt<bool>
EnableCmovConverter("x86-cmov-converter",
diff --git a/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
new file mode 100644
index 000000000000..7ce443c4656a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
@@ -0,0 +1,585 @@
+//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a pass that optimizes condition branches on x86 by taking
+// advantage of the three-way conditional code generated by compare
+// instructions.
+// Currently, it tries to hoisting EQ and NE conditional branch to a dominant
+// conditional branch condition where the same EQ/NE conditional code is
+// computed. An example:
+// bb_0:
+// cmp %0, 19
+// jg bb_1
+// jmp bb_2
+// bb_1:
+// cmp %0, 40
+// jg bb_3
+// jmp bb_4
+// bb_4:
+// cmp %0, 20
+// je bb_5
+// jmp bb_6
+// Here we could combine the two compares in bb_0 and bb_4 and have the
+// following code:
+// bb_0:
+// cmp %0, 20
+// jg bb_1
+// jl bb_2
+// jmp bb_5
+// bb_1:
+// cmp %0, 40
+// jg bb_3
+// jmp bb_6
+// For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
+// height for bb_6 is also reduced. bb_4 is gone after the optimization.
+//
+// There are plenty of this code patterns, especially from the switch case
+// lowing where we generate compare of "pivot-1" for the inner nodes in the
+// binary search tree.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-condbr-folding"
+
+STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
+
+namespace {
+class X86CondBrFoldingPass : public MachineFunctionPass {
+public:
+ X86CondBrFoldingPass() : MachineFunctionPass(ID) {
+ initializeX86CondBrFoldingPassPass(*PassRegistry::getPassRegistry());
+ }
+ StringRef getPassName() const override { return "X86 CondBr Folding"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ }
+
+public:
+ static char ID;
+};
+} // namespace
+
+char X86CondBrFoldingPass::ID = 0;
+INITIALIZE_PASS(X86CondBrFoldingPass, "X86CondBrFolding", "X86CondBrFolding", false, false)
+
+FunctionPass *llvm::createX86CondBrFolding() {
+ return new X86CondBrFoldingPass();
+}
+
+namespace {
+// A class the stores the auxiliary information for each MBB.
+struct TargetMBBInfo {
+ MachineBasicBlock *TBB;
+ MachineBasicBlock *FBB;
+ MachineInstr *BrInstr;
+ MachineInstr *CmpInstr;
+ X86::CondCode BranchCode;
+ unsigned SrcReg;
+ int CmpValue;
+ bool Modified;
+ bool CmpBrOnly;
+};
+
+// A class that optimizes the conditional branch by hoisting and merge CondCode.
+class X86CondBrFolding {
+public:
+ X86CondBrFolding(const X86InstrInfo *TII,
+ const MachineBranchProbabilityInfo *MBPI,
+ MachineFunction &MF)
+ : TII(TII), MBPI(MBPI), MF(MF) {}
+ bool optimize();
+
+private:
+ const X86InstrInfo *TII;
+ const MachineBranchProbabilityInfo *MBPI;
+ MachineFunction &MF;
+ std::vector<std::unique_ptr<TargetMBBInfo>> MBBInfos;
+ SmallVector<MachineBasicBlock *, 4> RemoveList;
+
+ void optimizeCondBr(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
+ MachineBasicBlock *NewDest);
+ void fixupModifiedCond(MachineBasicBlock *MBB);
+ std::unique_ptr<TargetMBBInfo> analyzeMBB(MachineBasicBlock &MBB);
+ static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ int &CmpValue);
+ bool findPath(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+ TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
+ return MBBInfos[MBB->getNumber()].get();
+ }
+};
+} // namespace
+
+// Find a valid path that we can reuse the CondCode.
+// The resulted path (if return true) is stored in BranchPath.
+// Return value:
+// false: is no valid path is found.
+// true: a valid path is found and the targetBB can be reached.
+bool X86CondBrFolding::findPath(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ assert(MBBInfo && "Expecting a candidate MBB");
+ int CmpValue = MBBInfo->CmpValue;
+
+ MachineBasicBlock *PredMBB = *MBB->pred_begin();
+ MachineBasicBlock *SaveMBB = MBB;
+ while (PredMBB) {
+ TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+ if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
+ return false;
+
+ assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
+ bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
+
+ X86::CondCode CC = PredMBBInfo->BranchCode;
+ assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
+ int PredCmpValue = PredMBBInfo->CmpValue;
+ bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
+ (CmpValue > PredCmpValue && CC == X86::COND_G) ||
+ (CmpValue == PredCmpValue && CC == X86::COND_E));
+ // Check if both the result of value compare and the branch target match.
+ if (!(ValueCmpTrue ^ IsFalseBranch)) {
+ LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
+ return false;
+ }
+
+ BranchPath.push_back(PredMBB);
+ // These are the conditions on which we could combine the compares.
+ if ((CmpValue == PredCmpValue) ||
+ (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
+ (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
+ return true;
+
+ // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
+ if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
+ return false;
+
+ SaveMBB = PredMBB;
+ PredMBB = *PredMBB->pred_begin();
+ }
+ return false;
+}
+
+// Fix up any PHI node in the successor of MBB.
+static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
+ MachineBasicBlock *NewMBB) {
+ if (NewMBB == OldMBB)
+ return;
+ for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
+ MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == OldMBB)
+ MO.setMBB(NewMBB);
+ }
+}
+
+// Utility function to set branch probability for edge MBB->SuccMBB.
+static inline bool setBranchProb(MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccMBB,
+ BranchProbability Prob) {
+ auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
+ if (MBBI == MBB->succ_end())
+ return false;
+ MBB->setSuccProbability(MBBI, Prob);
+ return true;
+}
+
+// Utility function to find the unconditional br instruction in MBB.
+static inline MachineBasicBlock::iterator
+findUncondBrI(MachineBasicBlock *MBB) {
+ return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
+ return MI.getOpcode() == X86::JMP_1;
+ });
+}
+
+// Replace MBB's original successor, OrigDest, with NewDest.
+// Also update the MBBInfo for MBB.
+void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
+ MachineBasicBlock *OrigDest,
+ MachineBasicBlock *NewDest) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ MachineInstr *BrMI;
+ if (MBBInfo->TBB == OrigDest) {
+ BrMI = MBBInfo->BrInstr;
+ unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
+ .addMBB(NewDest);
+ MBBInfo->TBB = NewDest;
+ MBBInfo->BrInstr = MIB.getInstr();
+ } else { // Should be the unconditional jump stmt.
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+ BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+ .addMBB(NewDest);
+ MBBInfo->FBB = NewDest;
+ BrMI = &*UncondBrI;
+ }
+ fixPHIsInSucc(NewDest, OrigDest, MBB);
+ BrMI->eraseFromParent();
+ MBB->addSuccessor(NewDest);
+ setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
+ MBB->removeSuccessor(OrigDest);
+}
+
+// Change the CondCode and BrInstr according to MBBInfo.
+void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+ if (!MBBInfo->Modified)
+ return;
+
+ MachineInstr *BrMI = MBBInfo->BrInstr;
+ X86::CondCode CC = MBBInfo->BranchCode;
+ MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
+ TII->get(GetCondBranchFromCond(CC)))
+ .addMBB(MBBInfo->TBB);
+ BrMI->eraseFromParent();
+ MBBInfo->BrInstr = MIB.getInstr();
+
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+ BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+ .addMBB(MBBInfo->FBB);
+ MBB->erase(UncondBrI);
+ MBBInfo->Modified = false;
+}
+
+//
+// Apply the transformation:
+// RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
+// \-2-> \-4-> \-6-> FalseMBB
+// ==>
+// RootMBB -1-> ... PredMBB -7-> FalseMBB
+// TargetMBB <-8-/ \-2-> \-4->
+//
+// Note that PredMBB and RootMBB could be the same.
+// And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
+//
+// There are some special handling where the RootMBB is COND_E in which case
+// we directly short-cycle the brinstr.
+//
+void X86CondBrFolding::optimizeCondBr(
+ MachineBasicBlock &MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+
+ X86::CondCode CC;
+ TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+ assert(MBBInfo && "Expecting a candidate MBB");
+ MachineBasicBlock *TargetMBB = MBBInfo->TBB;
+ BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
+
+ // Forward the jump from MBB's predecessor to MBB's false target.
+ MachineBasicBlock *PredMBB = BranchPath.front();
+ TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+ assert(PredMBBInfo && "Expecting a candidate MBB");
+ if (PredMBBInfo->Modified)
+ fixupModifiedCond(PredMBB);
+ CC = PredMBBInfo->BranchCode;
+ // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
+ // We will short-cycle directly for this case.
+ if (!(CC == X86::COND_E && BranchPath.size() == 1))
+ replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
+
+ MachineBasicBlock *RootMBB = BranchPath.back();
+ TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
+ assert(RootMBBInfo && "Expecting a candidate MBB");
+ if (RootMBBInfo->Modified)
+ fixupModifiedCond(RootMBB);
+ CC = RootMBBInfo->BranchCode;
+
+ if (CC != X86::COND_E) {
+ MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
+ // RootMBB: Cond jump to the original not-taken MBB.
+ X86::CondCode NewCC;
+ switch (CC) {
+ case X86::COND_L:
+ NewCC = X86::COND_G;
+ break;
+ case X86::COND_G:
+ NewCC = X86::COND_L;
+ break;
+ default:
+ llvm_unreachable("unexpected condtional code.");
+ }
+ BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+ TII->get(GetCondBranchFromCond(NewCC)))
+ .addMBB(RootMBBInfo->FBB);
+
+ // RootMBB: Jump to TargetMBB
+ BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+ TII->get(X86::JMP_1))
+ .addMBB(TargetMBB);
+ RootMBB->addSuccessor(TargetMBB);
+ fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
+ RootMBB->erase(UncondBrI);
+ } else {
+ replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
+ }
+
+ // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
+ // directly. Move MBB's stmt to here as the opcode might be different.
+ if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
+ MachineInstr *NewCmp = MBBInfo->CmpInstr;
+ NewCmp->removeFromParent();
+ RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
+ RootMBBInfo->CmpInstr->eraseFromParent();
+ }
+
+ // Fix branch Probabilities.
+ auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
+ BranchProbability Prob;
+ for (auto &I : BranchPath) {
+ MachineBasicBlock *ThisMBB = I;
+ if (!ThisMBB->hasSuccessorProbabilities() ||
+ !ThisMBB->isSuccessor(NextMBB))
+ break;
+ Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
+ if (Prob.isUnknown())
+ break;
+ TargetProb = Prob * TargetProb;
+ Prob = Prob - TargetProb;
+ setBranchProb(ThisMBB, NextMBB, Prob);
+ if (ThisMBB == RootMBB) {
+ setBranchProb(ThisMBB, TargetMBB, TargetProb);
+ }
+ ThisMBB->normalizeSuccProbs();
+ if (ThisMBB == RootMBB)
+ break;
+ NextMBB = ThisMBB;
+ }
+ return true;
+ };
+ if (CC != X86::COND_E && !TargetProb.isUnknown())
+ fixBranchProb(MBBInfo->FBB);
+
+ if (CC != X86::COND_E)
+ RemoveList.push_back(&MBB);
+
+ // Invalidate MBBInfo just in case.
+ MBBInfos[MBB.getNumber()] = nullptr;
+ MBBInfos[RootMBB->getNumber()] = nullptr;
+
+ LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
+ if (BranchPath.size() > 1)
+ LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
+}
+
+// Driver function for optimization: find the valid candidate and apply
+// the transformation.
+bool X86CondBrFolding::optimize() {
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
+ << " *****\n");
+ // Setup data structures.
+ MBBInfos.resize(MF.getNumBlockIDs());
+ for (auto &MBB : MF)
+ MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
+
+ for (auto &MBB : MF) {
+ TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+ if (!MBBInfo || !MBBInfo->CmpBrOnly)
+ continue;
+ if (MBB.pred_size() != 1)
+ continue;
+ LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
+ << " CmpValue: " << MBBInfo->CmpValue << "\n");
+ SmallVector<MachineBasicBlock *, 4> BranchPath;
+ if (!findPath(&MBB, BranchPath))
+ continue;
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
+ int Index = 1;
+ LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
+ for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
+ MachineBasicBlock *PMBB = *I;
+ TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
+ LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
+ << ") is " << *PMBB);
+ LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
+ << " Val=" << PMBBInfo->CmpValue
+ << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
+ }
+#endif
+ optimizeCondBr(MBB, BranchPath);
+ Changed = true;
+ }
+ NumFixedCondBrs += RemoveList.size();
+ for (auto MBBI : RemoveList) {
+ while (!MBBI->succ_empty())
+ MBBI->removeSuccessor(MBBI->succ_end() - 1);
+
+ MBBI->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+// Analyze instructions that generate CondCode and extract information.
+bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ int &CmpValue) {
+ unsigned SrcRegIndex = 0;
+ unsigned ValueIndex = 0;
+ switch (MI.getOpcode()) {
+ // TODO: handle test instructions.
+ default:
+ return false;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcRegIndex = 0;
+ ValueIndex = 1;
+ break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcRegIndex = 1;
+ ValueIndex = 2;
+ break;
+ }
+ SrcReg = MI.getOperand(SrcRegIndex).getReg();
+ if (!MI.getOperand(ValueIndex).isImm())
+ return false;
+ CmpValue = MI.getOperand(ValueIndex).getImm();
+ return true;
+}
+
+// Analyze a candidate MBB and set the extract all the information needed.
+// The valid candidate will have two successors.
+// It also should have a sequence of
+// Branch_instr,
+// CondBr,
+// UnCondBr.
+// Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
+std::unique_ptr<TargetMBBInfo>
+X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
+ MachineBasicBlock *TBB;
+ MachineBasicBlock *FBB;
+ MachineInstr *BrInstr;
+ MachineInstr *CmpInstr;
+ X86::CondCode CC;
+ unsigned SrcReg;
+ int CmpValue;
+ bool Modified;
+ bool CmpBrOnly;
+
+ if (MBB.succ_size() != 2)
+ return nullptr;
+
+ CmpBrOnly = true;
+ FBB = TBB = nullptr;
+ CmpInstr = nullptr;
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() == X86::JMP_1) {
+ if (FBB)
+ return nullptr;
+ FBB = I->getOperand(0).getMBB();
+ continue;
+ }
+ if (I->isBranch()) {
+ if (TBB)
+ return nullptr;
+ CC = X86::getCondFromBranchOpc(I->getOpcode());
+ switch (CC) {
+ default:
+ return nullptr;
+ case X86::COND_E:
+ case X86::COND_L:
+ case X86::COND_G:
+ case X86::COND_NE:
+ case X86::COND_LE:
+ case X86::COND_GE:
+ break;
+ }
+ TBB = I->getOperand(0).getMBB();
+ BrInstr = &*I;
+ continue;
+ }
+ if (analyzeCompare(*I, SrcReg, CmpValue)) {
+ if (CmpInstr)
+ return nullptr;
+ CmpInstr = &*I;
+ continue;
+ }
+ CmpBrOnly = false;
+ break;
+ }
+
+ if (!TBB || !FBB || !CmpInstr)
+ return nullptr;
+
+ // Simplify CondCode. Note this is only to simplify the findPath logic
+ // and will not change the instruction here.
+ switch (CC) {
+ case X86::COND_NE:
+ CC = X86::COND_E;
+ std::swap(TBB, FBB);
+ Modified = true;
+ break;
+ case X86::COND_LE:
+ if (CmpValue == INT_MAX)
+ return nullptr;
+ CC = X86::COND_L;
+ CmpValue += 1;
+ Modified = true;
+ break;
+ case X86::COND_GE:
+ if (CmpValue == INT_MIN)
+ return nullptr;
+ CC = X86::COND_G;
+ CmpValue -= 1;
+ Modified = true;
+ break;
+ default:
+ Modified = false;
+ break;
+ }
+ return llvm::make_unique<TargetMBBInfo>(TargetMBBInfo{
+ TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
+}
+
+bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (!ST.threewayBranchProfitable())
+ return false;
+ const X86InstrInfo *TII = ST.getInstrInfo();
+ const MachineBranchProbabilityInfo *MBPI =
+ &getAnalysis<MachineBranchProbabilityInfo>();
+
+ X86CondBrFolding CondBr(TII, MBPI, MF);
+ return CondBr.optimize();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
new file mode 100644
index 000000000000..3654bf04f4e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -0,0 +1,156 @@
+//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass aids profile-driven cache prefetch insertion by ensuring all
+/// instructions that have a memory operand are distinguishible from each other.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-discriminate-memops"
+
+namespace {
+
+using Location = std::pair<StringRef, unsigned>;
+
+Location diToLocation(const DILocation *Loc) {
+ return std::make_pair(Loc->getFilename(), Loc->getLine());
+}
+
+/// Ensure each instruction having a memory operand has a distinct <LineNumber,
+/// Discriminator> pair.
+void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) {
+ DebugLoc DL(Loc);
+ MI->setDebugLoc(DL);
+}
+
+class X86DiscriminateMemOps : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override {
+ return "X86 Discriminate Memory Operands";
+ }
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ X86DiscriminateMemOps();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char X86DiscriminateMemOps::ID = 0;
+
+/// Default construct and initialize the pass.
+X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
+
+bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+ DISubprogram *FDI = MF.getFunction().getSubprogram();
+ if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
+ return false;
+
+ // Have a default DILocation, if we find instructions with memops that don't
+ // have any debug info.
+ const DILocation *ReferenceDI =
+ DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
+
+ DenseMap<Location, unsigned> MemOpDiscriminators;
+ MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
+
+ // Figure out the largest discriminator issued for each Location. When we
+ // issue new discriminators, we can thus avoid issuing discriminators
+ // belonging to instructions that don't have memops. This isn't a requirement
+ // for the goals of this pass, however, it avoids unnecessary ambiguity.
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ const auto &DI = MI.getDebugLoc();
+ if (!DI)
+ continue;
+ Location Loc = diToLocation(DI);
+ MemOpDiscriminators[Loc] =
+ std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
+ }
+ }
+
+ // Keep track of the discriminators seen at each Location. If an instruction's
+ // DebugInfo has a Location and discriminator we've already seen, replace its
+ // discriminator with a new one, to guarantee uniqueness.
+ DenseMap<Location, DenseSet<unsigned>> Seen;
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
+ continue;
+ const DILocation *DI = MI.getDebugLoc();
+ if (!DI) {
+ DI = ReferenceDI;
+ }
+ Location L = diToLocation(DI);
+ DenseSet<unsigned> &Set = Seen[L];
+ const std::pair<DenseSet<unsigned>::iterator, bool> TryInsert =
+ Set.insert(DI->getBaseDiscriminator());
+ if (!TryInsert.second) {
+ unsigned BF, DF, CI = 0;
+ DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI);
+ Optional<unsigned> EncodedDiscriminator = DILocation::encodeDiscriminator(
+ MemOpDiscriminators[L] + 1, DF, CI);
+
+ if (!EncodedDiscriminator) {
+ // FIXME(mtrofin): The assumption is that this scenario is infrequent/OK
+ // not to support. If evidence points otherwise, we can explore synthesizeing
+ // unique DIs by adding fake line numbers, or by constructing 64 bit
+ // discriminators.
+ LLVM_DEBUG(dbgs() << "Unable to create a unique discriminator "
+ "for instruction with memory operand in: "
+ << DI->getFilename() << " Line: " << DI->getLine()
+ << " Column: " << DI->getColumn()
+ << ". This is likely due to a large macro expansion. \n");
+ continue;
+ }
+ // Since we were able to encode, bump the MemOpDiscriminators.
+ ++MemOpDiscriminators[L];
+ DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+ updateDebugInfo(&MI, DI);
+ Changed = true;
+ std::pair<DenseSet<unsigned>::iterator, bool> MustInsert =
+ Set.insert(DI->getBaseDiscriminator());
+ (void)MustInsert; // Silence warning in release build.
+ assert(MustInsert.second && "New discriminator shouldn't be present in set");
+ }
+
+ // Bump the reference DI to avoid cramming discriminators on line 0.
+ // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI
+ // in a block. It's more consistent than just relying on the last memop
+ // instruction we happened to see.
+ ReferenceDI = DI;
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createX86DiscriminateMemOpsPass() {
+ return new X86DiscriminateMemOps();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 62588e9509d3..d9ebbb506ca4 100644
--- a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -31,10 +31,6 @@
using namespace llvm;
-namespace llvm {
-void initializeX86DomainReassignmentPass(PassRegistry &);
-}
-
#define DEBUG_TYPE "x86-domain-reassignment"
STATISTIC(NumClosuresConverted, "Number of closures converted by the pass");
@@ -736,7 +732,10 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
STI = &MF.getSubtarget<X86Subtarget>();
// GPR->K is the only transformation currently supported, bail out early if no
// AVX512.
- if (!STI->hasAVX512())
+ // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
+ // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
+ // coalescer doesn't clean it up and we generate a spill we will crash.
+ if (!STI->hasAVX512() || !STI->hasBWI())
return false;
MRI = &MF.getRegInfo();
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index d082b42eefa9..9dd3f2652543 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1278,7 +1278,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
- unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
RetRegs.push_back(RetReg);
@@ -2900,23 +2900,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
- bool UseIncDec = false;
- if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
- UseIncDec = true;
-
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2938,9 +2930,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
- if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
+ CondOpc == X86::SETOr) {
+ // We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
- bool IsDec = BaseOpc == X86ISD::DEC;
+ bool IsDec = BaseOpc == ISD::SUB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
@@ -3222,8 +3216,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
- // Functions using retpoline should use SDISel for calls.
- if (Subtarget->useRetpoline())
+ // Functions using retpoline for indirect calls need to use SDISel.
+ if (Subtarget->useRetpolineIndirectCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.
@@ -3734,9 +3728,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
- // TODO: Support this properly.
- if (Subtarget->hasAVX512())
- return 0;
VT = MVT::i8;
LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
@@ -3744,7 +3735,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
case MVT::i32: Opc = X86::MOV32ri; break;
case MVT::i64: {
if (isUInt<32>(Imm))
- Opc = X86::MOV32ri;
+ Opc = X86::MOV32ri64;
else if (isInt<32>(Imm))
Opc = X86::MOV64ri32;
else
@@ -3752,14 +3743,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
break;
}
}
- if (VT == MVT::i64 && Opc == X86::MOV32ri) {
- unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
- unsigned ResultReg = createResultReg(&X86::GR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
- .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
- return ResultReg;
- }
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
@@ -4009,7 +3992,8 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
}
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- MI->eraseFromParent();
+ MachineBasicBlock::iterator I(MI);
+ removeDeadCode(I, std::next(I));
return true;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index d9bf60c2c9fb..ed297e678203 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -288,7 +288,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
for (unsigned i = 1; i < NumArgs; ++i)
MIB.add(MI->getOperand(i));
- MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands());
return MIB;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index f3f7f6a37360..a346085a52cb 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -25,10 +25,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace llvm {
-void initializeFixupLEAPassPass(PassRegistry &);
-}
-
#define FIXUPLEA_DESC "X86 LEA Fixup"
#define FIXUPLEA_NAME "x86-fixup-LEAs"
@@ -43,8 +39,8 @@ class FixupLEAPass : public MachineFunctionPass {
/// Loop over all of the instructions in the basic block
/// replacing applicable instructions with LEA instructions,
/// where appropriate.
- bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
-
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
+ bool IsSlowLEA, bool IsSlow3OpsLEA);
/// Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
@@ -62,10 +58,9 @@ class FixupLEAPass : public MachineFunctionPass {
MachineFunction::iterator MFI);
/// Given a LEA instruction which is unprofitable
- /// on Silvermont try to replace it with an equivalent ADD instruction
- void processInstructionForSLM(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
-
+ /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
+ void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
/// Given a LEA instruction which is unprofitable
/// on SNB+ try to replace it with other instructions.
@@ -197,8 +192,11 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
+ bool IsSlowLEA = ST.slowLEA();
+ bool IsSlow3OpsLEA = ST.slow3OpsLEA();
+
OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
- OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
+ OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
if (!OptLEA && !OptIncDec)
return false;
@@ -209,7 +207,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
- processBasicBlock(Func, I);
+ processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
return true;
@@ -278,14 +276,16 @@ static inline bool isLEA(const int Opcode) {
}
static inline bool isInefficientLEAReg(unsigned int Reg) {
- return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
+ return Reg == X86::EBP || Reg == X86::RBP ||
+ Reg == X86::R13D || Reg == X86::R13;
}
static inline bool isRegOperand(const MachineOperand &Op) {
return Op.isReg() && Op.getReg() != X86::NoRegister;
}
-/// hasIneffecientLEARegs - LEA that uses base and index registers
-/// where the base is EBP, RBP, or R13
+
+/// Returns true if this LEA uses base an index registers, and the base register
+/// is known to be inefficient for the subtarget.
// TODO: use a variant scheduling class to model the latency profile
// of LEA instructions, and implement this logic as a scheduling predicate.
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
@@ -333,13 +333,12 @@ static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
unsigned DstReg = LEA.getOperand(0).getReg();
- unsigned AddrDispOp = 1 + X86::AddrDisp;
+ const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
return SrcReg == DstReg &&
LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
- LEA.getOperand(AddrDispOp).isImm() &&
- (LEA.getOperand(AddrDispOp).getImm() == 1 ||
- LEA.getOperand(AddrDispOp).getImm() == -1);
+ AddrDisp.isImm() &&
+ (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
}
bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
@@ -351,7 +350,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
int NewOpcode;
- bool isINC = MI.getOperand(4).getImm() == 1;
+ bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
switch (Opcode) {
case X86::LEA16r:
NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
@@ -368,7 +367,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
MachineInstr *NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
.add(MI.getOperand(0))
- .add(MI.getOperand(1));
+ .add(MI.getOperand(1 + X86::AddrBaseReg));
MFI->erase(I);
I = static_cast<MachineBasicBlock::iterator>(NewMI);
return true;
@@ -414,21 +413,29 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
}
}
-void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
+void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
MachineInstr &MI = *I;
const int Opcode = MI.getOpcode();
if (!isLEA(Opcode))
return;
- if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
+
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
+ const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
+ const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
+
+ if (Segment.getReg() != 0 || !Offset.isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
return;
- const unsigned DstR = MI.getOperand(0).getReg();
- const unsigned SrcR1 = MI.getOperand(1).getReg();
- const unsigned SrcR2 = MI.getOperand(3).getReg();
+ const unsigned DstR = Dst.getReg();
+ const unsigned SrcR1 = Base.getReg();
+ const unsigned SrcR2 = Index.getReg();
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
return;
- if (MI.getOperand(2).getImm() > 1)
+ if (Scale.getImm() > 1)
return;
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
@@ -436,19 +443,19 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
- const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+ const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
LLVM_DEBUG(NewMI->dump(););
}
// Make ADD instruction for immediate
- if (MI.getOperand(4).getImm() != 0) {
+ if (Offset.getImm() != 0) {
const MCInstrDesc &ADDri =
- TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
- const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
+ TII->get(getADDriFromLEA(Opcode, Offset));
+ const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
.add(SrcR)
- .addImm(MI.getOperand(4).getImm());
+ .addImm(Offset.getImm());
LLVM_DEBUG(NewMI->dump(););
}
if (NewMI) {
@@ -465,12 +472,12 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
if (!isLEA(LEAOpcode))
return nullptr;
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Base = MI.getOperand(1);
- const MachineOperand &Scale = MI.getOperand(2);
- const MachineOperand &Index = MI.getOperand(3);
- const MachineOperand &Offset = MI.getOperand(4);
- const MachineOperand &Segment = MI.getOperand(5);
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
+ const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
+ const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (!(TII->isThreeOperandsLEA(MI) ||
hasInefficientLEABaseReg(Base, Index)) ||
@@ -570,26 +577,28 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
}
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
- MachineFunction::iterator MFI) {
-
+ MachineFunction::iterator MFI,
+ bool IsSlowLEA, bool IsSlow3OpsLEA) {
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
if (OptIncDec)
if (fixupIncDec(I, MFI))
continue;
if (OptLEA) {
- if (MF.getSubtarget<X86Subtarget>().slowLEA())
- processInstructionForSLM(I, MFI);
-
- else {
- if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
- if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
- MFI->erase(I);
- I = NewMI;
- }
- } else
- processInstruction(I, MFI);
+ if (IsSlowLEA) {
+ processInstructionForSlowLEA(I, MFI);
+ continue;
}
+
+ if (IsSlow3OpsLEA) {
+ if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
+ MFI->erase(I);
+ I = NewMI;
+ }
+ continue;
+ }
+
+ processInstruction(I, MFI);
}
}
return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index d2f2f21542a9..778aa505b2d9 100644
--- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -1053,7 +1053,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MIB.addReg(CondReg);
- MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end());
+ MIB.setMemRefs(SetCCI.memoperands());
SetCCI.eraseFromParent();
return;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index e207c343fac8..984db12201ed 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -185,7 +185,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
}
for (auto CS : AvailableRegs)
- if (!Uses.count(CS) && CS != X86::RIP)
+ if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP &&
+ CS != X86::ESP)
return CS;
}
}
@@ -765,7 +766,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
// FIXME: Add retpoline support and remove this.
- if (Is64Bit && IsLargeCodeModel && STI.useRetpoline())
+ if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
@@ -1103,15 +1104,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(NumBytes, MaxAlign);
- // Get the offset of the stack slot for the EBP register, which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- if (!IsFunclet)
- MFI.setOffsetAdjustment(-NumBytes);
- else
- assert(MFI.getOffsetAdjustment() == -(int)NumBytes &&
- "should calculate same local variable offset for funclets");
-
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(MachineFramePtr, RegState::Kill)
@@ -1167,6 +1159,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ // Update the offset adjustment, which is mainly used by codeview to translate
+ // from ESP to VFRAME relative local variable offsets.
+ if (!IsFunclet) {
+ if (HasFP && TRI->needsStackRealignment(MF))
+ MFI.setOffsetAdjustment(-NumBytes);
+ else
+ MFI.setOffsetAdjustment(-StackSize);
+ }
+
// For EH funclets, only allocate enough space for outgoing calls. Save the
// NumBytes value that we would've used for the parent frame.
unsigned ParentFrameNumBytes = NumBytes;
@@ -1208,6 +1209,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
+ .addImm(MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -1983,6 +1991,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
}
X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+ MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
// Assign slots for XMMs.
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -2262,9 +2271,15 @@ void X86FrameLowering::adjustForSegmentedStacks(
// Do not generate a prologue for leaf functions with a stack of size zero.
// For non-leaf functions we have to allow for the possibility that the
- // call is to a non-split function, as in PR37807.
- if (StackSize == 0 && !MFI.hasTailCall())
+ // callis to a non-split function, as in PR37807. This function could also
+ // take the address of a non-split function. When the linker tries to adjust
+ // its non-existent prologue, it would fail with an error. Mark the object
+ // file so that such failures are not errors. See this Go language bug-report
+ // https://go-review.googlesource.com/c/go/+/148819/
+ if (StackSize == 0 && !MFI.hasTailCall()) {
+ MF.getMMI().setHasNosplitStack(true);
return;
+ }
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -2437,7 +2452,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
- if (STI.useRetpoline())
+ if (STI.useRetpolineIndirectCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
@@ -2463,8 +2478,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
allocMBB->addSuccessor(&PrologueMBB);
- checkMBB->addSuccessor(allocMBB);
- checkMBB->addSuccessor(&PrologueMBB);
+ checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
+ checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
#ifdef EXPENSIVE_CHECKS
MF.verify();
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index a28d4eac8393..5ac153244df9 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -41,6 +41,10 @@ using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
+ cl::desc("Enable setting constant bits to reduce size of mask immediates"),
+ cl::Hidden);
+
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
@@ -161,6 +165,9 @@ namespace {
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
+ /// Disable direct TLS access through segment registers.
+ bool IndirectTlsSegRefs;
+
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
@@ -173,6 +180,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
Subtarget = &MF.getSubtarget<X86Subtarget>();
+ IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
+ "indirect-tls-seg-refs");
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -235,12 +244,6 @@ namespace {
return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
}
- // Try to fold a vector load. This makes sure the load isn't non-temporal.
- bool tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
- SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp,
- SDValue &Segment);
-
/// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
@@ -443,6 +446,9 @@ namespace {
switch (StoreSize) {
default: llvm_unreachable("Unsupported store size");
+ case 4:
+ case 8:
+ return false;
case 16:
return Subtarget->hasSSE41();
case 32:
@@ -453,15 +459,23 @@ namespace {
}
bool foldLoadStoreIntoMemOperand(SDNode *Node);
- bool matchBEXTRFromAnd(SDNode *Node);
+ MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
+ bool matchBitExtract(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
+ bool tryShiftAmountMod(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,
SDValue &InFlag);
+
+ bool tryOptimizeRem8Extend(SDNode *N);
+
+ bool onlyUsesZeroFlag(SDValue Flags) const;
+ bool hasNoSignFlagUses(SDValue Flags) const;
+ bool hasNoCarryFlagUses(SDValue Flags) const;
};
}
@@ -512,12 +526,18 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (N.getOpcode() != ISD::LOAD)
return true;
+ // Don't fold non-temporal loads if we have an instruction for them.
+ if (useNonTemporalLoad(cast<LoadSDNode>(N)))
+ return false;
+
// If N is a load, do additional profitability checks.
if (U == Root) {
switch (U->getOpcode()) {
default: break;
case X86ISD::ADD:
+ case X86ISD::ADC:
case X86ISD::SUB:
+ case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::XOR:
case X86ISD::OR:
@@ -724,7 +744,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
- !Subtarget->useRetpoline() &&
+ !Subtarget->useRetpolineIndirectCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
@@ -827,24 +847,144 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
}
+// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
+bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
+ Opc != X86::MOVSX64rr8)
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+
+ // We need to be extracting the lower bit of an extend.
+ if (!N0.isMachineOpcode() ||
+ N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
+ N0.getConstantOperandVal(1) != X86::sub_8bit)
+ return false;
+
+ // We're looking for either a movsx or movzx to match the original opcode.
+ unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
+ : X86::MOVSX32rr8_NOREX;
+ SDValue N00 = N0.getOperand(0);
+ if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
+ return false;
+
+ if (Opc == X86::MOVSX64rr8) {
+ // If we had a sign extend from 8 to 64 bits. We still need to go from 32
+ // to 64.
+ MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
+ MVT::i64, N00);
+ ReplaceUses(N, Extend);
+ } else {
+ // Ok we can drop this extend and just use the original extend.
+ ReplaceUses(N, N00.getNode());
+ }
+
+ return true;
+}
void X86DAGToDAGISel::PostprocessISelDAG() {
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
- // Attempt to remove vectors moves that were inserted to zero upper bits.
-
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+ bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
- if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
+ if (tryOptimizeRem8Extend(N)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Look for a TESTrr+ANDrr pattern where both operands of the test are
+ // the same. Rewrite to remove the AND.
+ unsigned Opc = N->getMachineOpcode();
+ if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
+ Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
+ N->getOperand(0) == N->getOperand(1) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->getOperand(0).isMachineOpcode()) {
+ SDValue And = N->getOperand(0);
+ unsigned N0Opc = And.getMachineOpcode();
+ if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
+ N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
+ MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
+ MVT::i32,
+ And.getOperand(0),
+ And.getOperand(1));
+ ReplaceUses(N, Test);
+ MadeChange = true;
+ continue;
+ }
+ if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
+ N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
+ unsigned NewOpc;
+ switch (N0Opc) {
+ case X86::AND8rm: NewOpc = X86::TEST8mr; break;
+ case X86::AND16rm: NewOpc = X86::TEST16mr; break;
+ case X86::AND32rm: NewOpc = X86::TEST32mr; break;
+ case X86::AND64rm: NewOpc = X86::TEST64mr; break;
+ }
+
+ // Need to swap the memory and register operand.
+ SDValue Ops[] = { And.getOperand(1),
+ And.getOperand(2),
+ And.getOperand(3),
+ And.getOperand(4),
+ And.getOperand(5),
+ And.getOperand(0),
+ And.getOperand(6) /* Chain */ };
+ MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+ MVT::i32, MVT::Other, Ops);
+ ReplaceUses(N, Test);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
+ // used. We're doing this late so we can prefer to fold the AND into masked
+ // comparisons. Doing that can be better for the live range of the mask
+ // register.
+ if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
+ Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
+ N->getOperand(0) == N->getOperand(1) &&
+ N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->getOperand(0).isMachineOpcode() &&
+ onlyUsesZeroFlag(SDValue(N, 0))) {
+ SDValue And = N->getOperand(0);
+ unsigned N0Opc = And.getMachineOpcode();
+ // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
+ // KAND instructions and KTEST use the same ISA feature.
+ if (N0Opc == X86::KANDBrr ||
+ (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
+ N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
+ unsigned NewOpc;
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
+ case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
+ case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
+ case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
+ }
+ MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+ MVT::i32,
+ And.getOperand(0),
+ And.getOperand(1));
+ ReplaceUses(N, KTest);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ // Attempt to remove vectors moves that were inserted to zero upper bits.
+ if (Opc != TargetOpcode::SUBREG_TO_REG)
continue;
unsigned SubRegIdx = N->getConstantOperandVal(2);
@@ -881,14 +1021,22 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
continue;
+ // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
+ // the SHA instructions which use a legacy encoding.
+ uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
+ if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
+ (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
+ (TSFlags & X86II::EncodingMask) != X86II::XOP)
+ continue;
+
// Producing instruction is another vector instruction. We can drop the
// move.
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
-
- // If the move is now dead, delete it.
- if (Move.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Move.getNode());
+ MadeChange = true;
}
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
@@ -964,6 +1112,7 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
+ !IndirectTlsSegRefs &&
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetFuchsia()))
switch (N->getPointerInfo().getAddrSpace()) {
@@ -1291,8 +1440,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
}
APInt MaskedHighBits =
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
- KnownBits Known;
- DAG.computeKnownBits(X, Known);
+ KnownBits Known = DAG.computeKnownBits(X);
if (MaskedHighBits != Known.Zero) return true;
// We've identified a pattern that can be transformed into a single shift
@@ -1327,6 +1475,64 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
return false;
}
+// Transform "(X >> SHIFT) & (MASK << C1)" to
+// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
+// matched to a BEXTR later. Returns false if the simplification is performed.
+static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM,
+ const X86Subtarget &Subtarget) {
+ if (Shift.getOpcode() != ISD::SRL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ !Shift.hasOneUse() || !N.hasOneUse())
+ return true;
+
+ // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
+ if (!Subtarget.hasTBM() &&
+ !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+ return true;
+
+ // We need to ensure that mask is a continuous run of bits.
+ if (!isShiftedMask_64(Mask)) return true;
+
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+
+ // The amount of shift we're trying to fit into the addressing mode is taken
+ // from the trailing zeros of the mask.
+ unsigned AMShiftAmt = countTrailingZeros(Mask);
+
+ // There is nothing we can do here unless the mask is removing some bits.
+ // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+ if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+ MVT VT = N.getSimpleValueType();
+ SDLoc DL(N);
+ SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
+ SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ insertDAGNode(DAG, N, NewSRLAmt);
+ insertDAGNode(DAG, N, NewSRL);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewSHLAmt);
+ insertDAGNode(DAG, N, NewSHL);
+ DAG.ReplaceAllUsesWith(N, NewSHL);
+
+ AM.Scale = 1 << AMShiftAmt;
+ AM.IndexReg = NewAnd;
+ return false;
+}
+
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
@@ -1607,6 +1813,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// a scale on the outside of the mask.
if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
return false;
+
+ // Try to fold the mask and shift into BEXTR and scale.
+ if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+ return false;
+
break;
}
}
@@ -2039,20 +2250,6 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
-bool X86DAGToDAGISel::tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
- SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp,
- SDValue &Segment) {
- if (!ISD::isNON_EXTLoad(N.getNode()) ||
- useNonTemporalLoad(cast<LoadSDNode>(N)) ||
- !IsProfitableToFold(N, P, Root) ||
- !IsLegalToFold(N, P, Root, OptLevel))
- return false;
-
- return selectAddr(N.getNode(),
- N.getOperand(1), Base, Scale, Index, Disp, Segment);
-}
-
/// Return an SDNode that returns the value of the global base register.
/// Output instructions required to initialize the global base register,
/// if necessary.
@@ -2077,18 +2274,30 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
CR->getSignedMax().slt(1ull << Width);
}
-/// Test whether the given X86ISD::CMP node has any uses which require the SF
-/// or OF bits to be accurate.
-static bool hasNoSignedComparisonUses(SDNode *N) {
+static X86::CondCode getCondFromOpc(unsigned Opc) {
+ X86::CondCode CC = X86::COND_INVALID;
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromBranchOpc(Opc);
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromSETOpc(Opc);
+ if (CC == X86::COND_INVALID)
+ CC = X86::getCondFromCMovOpc(Opc);
+
+ return CC;
+}
+
+/// Test whether the given X86ISD::CMP node has any users that use a flag
+/// other than ZF.
+bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
// Examine each user of the node.
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); UI != UE; ++UI) {
- // Only examine CopyToReg uses.
- if (UI->getOpcode() != ISD::CopyToReg)
- return false;
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ // Only check things that use the flags.
+ if (UI.getUse().getResNo() != Flags.getResNo())
+ continue;
// Only examine CopyToReg uses that copy to EFLAGS.
- if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
- X86::EFLAGS)
+ if (UI->getOpcode() != ISD::CopyToReg ||
+ cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
for (SDNode::use_iterator FlagUI = UI->use_begin(),
@@ -2097,105 +2306,52 @@ static bool hasNoSignedComparisonUses(SDNode *N) {
if (FlagUI.getUse().getResNo() != 1) continue;
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode()) return false;
- // Examine the opcode of the user.
- switch (FlagUI->getMachineOpcode()) {
- // These comparisons don't treat the most significant bit specially.
- case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
- case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
- case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
- case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
- case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
- case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
- case X86::CMOVA16rr: case X86::CMOVA16rm:
- case X86::CMOVA32rr: case X86::CMOVA32rm:
- case X86::CMOVA64rr: case X86::CMOVA64rm:
- case X86::CMOVAE16rr: case X86::CMOVAE16rm:
- case X86::CMOVAE32rr: case X86::CMOVAE32rm:
- case X86::CMOVAE64rr: case X86::CMOVAE64rm:
- case X86::CMOVB16rr: case X86::CMOVB16rm:
- case X86::CMOVB32rr: case X86::CMOVB32rm:
- case X86::CMOVB64rr: case X86::CMOVB64rm:
- case X86::CMOVBE16rr: case X86::CMOVBE16rm:
- case X86::CMOVBE32rr: case X86::CMOVBE32rm:
- case X86::CMOVBE64rr: case X86::CMOVBE64rm:
- case X86::CMOVE16rr: case X86::CMOVE16rm:
- case X86::CMOVE32rr: case X86::CMOVE32rm:
- case X86::CMOVE64rr: case X86::CMOVE64rm:
- case X86::CMOVNE16rr: case X86::CMOVNE16rm:
- case X86::CMOVNE32rr: case X86::CMOVNE32rm:
- case X86::CMOVNE64rr: case X86::CMOVNE64rm:
- case X86::CMOVNP16rr: case X86::CMOVNP16rm:
- case X86::CMOVNP32rr: case X86::CMOVNP32rm:
- case X86::CMOVNP64rr: case X86::CMOVNP64rm:
- case X86::CMOVP16rr: case X86::CMOVP16rm:
- case X86::CMOVP32rr: case X86::CMOVP32rm:
- case X86::CMOVP64rr: case X86::CMOVP64rm:
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ switch (CC) {
+ // Comparisons which only use the zero flag.
+ case X86::COND_E: case X86::COND_NE:
continue;
// Anything else: assume conservatively.
- default: return false;
+ default:
+ return false;
}
}
}
return true;
}
-/// Test whether the given node which sets flags has any uses which require the
-/// CF flag to be accurate.
-static bool hasNoCarryFlagUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// flag to be accurate.
+bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Examine each user of the node.
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
- ++UI) {
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
// Only check things that use the flags.
- if (UI.getUse().getResNo() != 1)
+ if (UI.getUse().getResNo() != Flags.getResNo())
continue;
- // Only examine CopyToReg uses.
- if (UI->getOpcode() != ISD::CopyToReg)
- return false;
// Only examine CopyToReg uses that copy to EFLAGS.
- if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+ if (UI->getOpcode() != ISD::CopyToReg ||
+ cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
- for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
- FlagUI != FlagUE; ++FlagUI) {
+ for (SDNode::use_iterator FlagUI = UI->use_begin(),
+ FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
// Only examine the Flag result.
- if (FlagUI.getUse().getResNo() != 1)
- continue;
+ if (FlagUI.getUse().getResNo() != 1) continue;
// Anything unusual: assume conservatively.
- if (!FlagUI->isMachineOpcode())
- return false;
- // Examine the opcode of the user.
- switch (FlagUI->getMachineOpcode()) {
- // Comparisons which don't examine the CF flag.
- case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
- case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
- case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
- case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
- case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
- case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
- case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
- case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
- case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
- case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
- case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
- case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
- case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
- case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
- case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
- case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
- case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
- case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
- case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
- case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
- case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
- case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
- case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
- case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
- case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
- case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
- case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
- case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
- case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
- case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
+ if (!FlagUI->isMachineOpcode()) return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ switch (CC) {
+ // Comparisons which don't examine the SF flag.
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_P: case X86::COND_NP:
continue;
// Anything else: assume conservatively.
default:
@@ -2206,23 +2362,96 @@ static bool hasNoCarryFlagUses(SDNode *N) {
return true;
}
+static bool mayUseCarryFlag(X86::CondCode CC) {
+ switch (CC) {
+ // Comparisons which don't examine the CF flag.
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_S: case X86::COND_NS:
+ case X86::COND_P: case X86::COND_NP:
+ case X86::COND_L: case X86::COND_GE:
+ case X86::COND_G: case X86::COND_LE:
+ return false;
+ // Anything else: assume conservatively.
+ default:
+ return true;
+ }
+}
+
+/// Test whether the given node which sets flags has any uses which require the
+/// CF flag to be accurate.
+ bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
+ // Examine each user of the node.
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ // Only check things that use the flags.
+ if (UI.getUse().getResNo() != Flags.getResNo())
+ continue;
+
+ unsigned UIOpc = UI->getOpcode();
+
+ if (UIOpc == ISD::CopyToReg) {
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+ return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+ FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1)
+ continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode())
+ return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ if (mayUseCarryFlag(CC))
+ return false;
+ }
+
+ // This CopyToReg is ok. Move on to the next user.
+ continue;
+ }
+
+ // This might be an unselected node. So look for the pre-isel opcodes that
+ // use flags.
+ unsigned CCOpNo;
+ switch (UIOpc) {
+ default:
+ // Something unusual. Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
+ }
+
+ X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
+ if (mayUseCarryFlag(CC))
+ return false;
+ }
+ return true;
+}
+
/// Check whether or not the chain ending in StoreNode is suitable for doing
/// the {load; op; store} to modify transformation.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
SDValue StoredVal, SelectionDAG *CurDAG,
+ unsigned LoadOpNo,
LoadSDNode *&LoadNode,
SDValue &InputChain) {
- // is the stored value result 0 of the load?
+ // Is the stored value result 0 of the operation?
if (StoredVal.getResNo() != 0) return false;
- // are there other uses of the loaded value than the inc or dec?
+ // Are there other uses of the operation other than the store?
if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
- // is the store non-extending and non-indexed?
+ // Is the store non-extending and non-indexed?
if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
return false;
- SDValue Load = StoredVal->getOperand(0);
+ SDValue Load = StoredVal->getOperand(LoadOpNo);
// Is the stored value a non-extending and non-indexed load?
if (!ISD::isNormalLoad(Load.getNode())) return false;
@@ -2351,26 +2580,37 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
MemVT != MVT::i8)
return false;
+
+ bool IsCommutable = false;
switch (Opc) {
default:
return false;
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
case X86ISD::SBB:
+ break;
+ case X86ISD::ADD:
+ case X86ISD::ADC:
case X86ISD::AND:
case X86ISD::OR:
case X86ISD::XOR:
+ IsCommutable = true;
break;
}
+ unsigned LoadOpNo = 0;
LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
- if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
- InputChain))
- return false;
+ if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+ LoadNode, InputChain)) {
+ if (!IsCommutable)
+ return false;
+
+ // This operation is commutable, try the other operand.
+ LoadOpNo = 1;
+ if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+ LoadNode, InputChain))
+ return false;
+ }
SDValue Base, Scale, Index, Disp, Segment;
if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
@@ -2395,20 +2635,27 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
MachineSDNode *Result;
switch (Opc) {
- case X86ISD::INC:
- case X86ISD::DEC: {
- unsigned NewOpc =
- Opc == X86ISD::INC
- ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
- : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- Result =
- CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
- break;
- }
case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
+ // Try to match inc/dec.
+ if (!Subtarget->slowIncDec() ||
+ CurDAG->getMachineFunction().getFunction().optForSize()) {
+ bool IsOne = isOneConstant(StoredVal.getOperand(1));
+ bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
+ // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
+ if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
+ unsigned NewOpc =
+ ((Opc == X86ISD::ADD) == IsOne)
+ ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+ : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+ MVT::Other, Ops);
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::OR:
@@ -2488,7 +2735,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
};
unsigned NewOpc = SelectRegOpcode(Opc);
- SDValue Operand = StoredVal->getOperand(1);
+ SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
// See if the operand is a constant that we can fold into an immediate
// operand.
@@ -2503,7 +2750,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
(-OperandV).getMinSignedBits() <= 8) ||
(MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
(-OperandV).getMinSignedBits() <= 32)) &&
- hasNoCarryFlagUses(StoredVal.getNode())) {
+ hasNoCarryFlagUses(StoredVal.getValue(1))) {
OperandV = -OperandV;
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
}
@@ -2541,10 +2788,9 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
llvm_unreachable("Invalid opcode!");
}
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
- MemOp[0] = StoreNode->getMemOperand();
- MemOp[1] = LoadNode->getMemOperand();
- Result->setMemRefs(MemOp, MemOp + 2);
+ MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
+ LoadNode->getMemOperand()};
+ CurDAG->setNodeMemRefs(Result, MemOps);
// Update Load Chain uses as well.
ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
@@ -2554,39 +2800,273 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
return true;
}
+// See if this is an X & Mask that we can match to BEXTR/BZHI.
+// Where Mask is one of the following patterns:
+// a) x & (1 << nbits) - 1
+// b) x & ~(-1 << nbits)
+// c) x & (-1 >> (32 - y))
+// d) x << (32 - y) >> (32 - y)
+bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
+ assert(
+ (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
+ "Should be either an and-mask, or right-shift after clearing high bits.");
+
+ // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
+ if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
+ return false;
+
+ MVT NVT = Node->getSimpleValueType(0);
+
+ // Only supported for 32 and 64 bits.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ return false;
+
+ unsigned Size = NVT.getSizeInBits();
+
+ SDValue NBits;
+
+ // If we have BMI2's BZHI, we are ok with muti-use patterns.
+ // Else, if we only have BMI1's BEXTR, we require one-use.
+ const bool CanHaveExtraUses = Subtarget->hasBMI2();
+ auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
+ return CanHaveExtraUses ||
+ Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
+ };
+ auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
+ auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+
+ // a) x & ((1 << nbits) + (-1))
+ auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ // Match `add`. Must only have one use!
+ if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
+ return false;
+ // We should be adding all-ones constant (i.e. subtracting one.)
+ if (!isAllOnesConstant(Mask->getOperand(1)))
+ return false;
+ // Match `1 << nbits`. Must only have one use!
+ SDValue M0 = Mask->getOperand(0);
+ if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+ return false;
+ if (!isOneConstant(M0->getOperand(0)))
+ return false;
+ NBits = M0->getOperand(1);
+ return true;
+ };
+
+ // b) x & ~(-1 << nbits)
+ auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ // Match `~()`. Must only have one use!
+ if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
+ return false;
+ // Match `-1 << nbits`. Must only have one use!
+ SDValue M0 = Mask->getOperand(0);
+ if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+ return false;
+ if (!isAllOnesConstant(M0->getOperand(0)))
+ return false;
+ NBits = M0->getOperand(1);
+ return true;
+ };
+
+ // Match potentially-truncated (bitwidth - y)
+ auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
+ // Skip over a truncate of the shift amount.
+ if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
+ ShiftAmt = ShiftAmt.getOperand(0);
+ // The trunc should have been the only user of the real shift amount.
+ if (!checkOneUse(ShiftAmt))
+ return false;
+ }
+ // Match the shift amount as: (bitwidth - y). It should go away, too.
+ if (ShiftAmt.getOpcode() != ISD::SUB)
+ return false;
+ auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+ if (!V0 || V0->getZExtValue() != Size)
+ return false;
+ NBits = ShiftAmt.getOperand(1);
+ return true;
+ };
+
+ // c) x & (-1 >> (32 - y))
+ auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
+ // Match `l>>`. Must only have one use!
+ if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
+ return false;
+ // We should be shifting all-ones constant.
+ if (!isAllOnesConstant(Mask.getOperand(0)))
+ return false;
+ SDValue M1 = Mask.getOperand(1);
+ // The shift amount should not be used externally.
+ if (!checkOneUse(M1))
+ return false;
+ return matchShiftAmt(M1);
+ };
+
+ SDValue X;
+
+ // d) x << (32 - y) >> (32 - y)
+ auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
+ &X](SDNode *Node) -> bool {
+ if (Node->getOpcode() != ISD::SRL)
+ return false;
+ SDValue N0 = Node->getOperand(0);
+ if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
+ return false;
+ SDValue N1 = Node->getOperand(1);
+ SDValue N01 = N0->getOperand(1);
+ // Both of the shifts must be by the exact same value.
+ // There should not be any uses of the shift amount outside of the pattern.
+ if (N1 != N01 || !checkTwoUse(N1))
+ return false;
+ if (!matchShiftAmt(N1))
+ return false;
+ X = N0->getOperand(0);
+ return true;
+ };
+
+ auto matchLowBitMask = [&matchPatternA, &matchPatternB,
+ &matchPatternC](SDValue Mask) -> bool {
+ // FIXME: pattern c.
+ return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
+ };
+
+ if (Node->getOpcode() == ISD::AND) {
+ X = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+
+ if (matchLowBitMask(Mask)) {
+ // Great.
+ } else {
+ std::swap(X, Mask);
+ if (!matchLowBitMask(Mask))
+ return false;
+ }
+ } else if (!matchPatternD(Node))
+ return false;
+
+ SDLoc DL(Node);
+
+ // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
+ // shifted (potentially with one-use trunc inbetween),
+ // and if so look past one-use truncation.
+ MVT XVT = NVT;
+ if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
+ X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
+ assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
+ X = X.getOperand(0);
+ XVT = X.getSimpleValueType();
+ assert(XVT == MVT::i64 && "Expected truncation from i64");
+ }
+
+ SDValue OrigNBits = NBits;
+ if (NBits.getValueType() != XVT) {
+ // Truncate the shift amount.
+ NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
+ insertDAGNode(*CurDAG, OrigNBits, NBits);
+
+ // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
+ // register. All the other bits are undefined, we do not care about them.
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
+ insertDAGNode(*CurDAG, OrigNBits, ImplDef);
+ NBits =
+ CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
+ insertDAGNode(*CurDAG, OrigNBits, NBits);
+ }
+
+ if (Subtarget->hasBMI2()) {
+ // Great, just emit the the BZHI..
+ SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+ ReplaceNode(Node, Extract.getNode());
+ SelectCode(Extract.getNode());
+ return true;
+ }
+
+ // Else, emitting BEXTR requires one more step.
+ // The 'control' of BEXTR has the pattern of:
+ // [15...8 bit][ 7...0 bit] location
+ // [ bit count][ shift] name
+ // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
+
+ // Shift NBits left by 8 bits, thus producing 'control'.
+ // This makes the low 8 bits to be zero.
+ SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
+ SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
+ insertDAGNode(*CurDAG, OrigNBits, Control);
+
+ // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+ if (X.getOpcode() == ISD::SRL) {
+ SDValue ShiftAmt = X.getOperand(1);
+ X = X.getOperand(0);
+
+ assert(ShiftAmt.getValueType() == MVT::i8 &&
+ "Expected shift amount to be i8");
+
+ // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
+ SDValue OrigShiftAmt = ShiftAmt;
+ ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
+ insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
+
+ // And now 'or' these low 8 bits of shift amount into the 'control'.
+ Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
+ insertDAGNode(*CurDAG, OrigNBits, Control);
+ }
+
+ // And finally, form the BEXTR itself.
+ SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
+
+ // The 'X' was originally truncated. Do that now.
+ if (XVT != NVT) {
+ insertDAGNode(*CurDAG, OrigNBits, Extract);
+ Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
+ }
+
+ ReplaceNode(Node, Extract.getNode());
+ SelectCode(Extract.getNode());
+
+ return true;
+}
+
// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
+MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
SDLoc dl(Node);
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
- return false;
+ // If we have TBM we can use an immediate for the control. If we have BMI
+ // we should only do this if the BEXTR instruction is implemented well.
+ // Otherwise moving the control into a register makes this more costly.
+ // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+ // hoisting the move immediate would make it worthwhile with a less optimal
+ // BEXTR?
+ if (!Subtarget->hasTBM() &&
+ !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
+ return nullptr;
// Must have a shift right.
if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
- return false;
+ return nullptr;
// Shift can't have additional users.
if (!N0->hasOneUse())
- return false;
+ return nullptr;
// Only supported for 32 and 64 bits.
if (NVT != MVT::i32 && NVT != MVT::i64)
- return false;
+ return nullptr;
// Shift amount and RHS of and must be constant.
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (!MaskCst || !ShiftCst)
- return false;
+ return nullptr;
// And RHS must be a mask.
uint64_t Mask = MaskCst->getZExtValue();
if (!isMask_64(Mask))
- return false;
+ return nullptr;
uint64_t Shift = ShiftCst->getZExtValue();
uint64_t MaskSize = countPopulation(Mask);
@@ -2594,20 +3074,41 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
// Don't interfere with something that can be handled by extracting AH.
// TODO: If we are able to fold a load, BEXTR might still be better than AH.
if (Shift == 8 && MaskSize == 8)
- return false;
+ return nullptr;
// Make sure we are only using bits that were in the original value, not
// shifted in.
if (Shift + MaskSize > NVT.getSizeInBits())
- return false;
+ return nullptr;
+
+ SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
+ unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
+ unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+
+ // BMI requires the immediate to placed in a register.
+ if (!Subtarget->hasTBM()) {
+ ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
+ MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
+ New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
+ }
- // Create a BEXTR node and run it through selection.
- SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
- SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
- N0->getOperand(0), C);
- ReplaceNode(Node, New.getNode());
- SelectCode(New.getNode());
- return true;
+ MachineSDNode *NewNode;
+ SDValue Input = N0->getOperand(0);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ // Update the chain.
+ ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
+ } else {
+ NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+ }
+
+ return NewNode;
}
// Emit a PCMISTR(I/M) instruction.
@@ -2620,23 +3121,17 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
- // If there is a load, it will be behind a bitcast. We don't need to check
- // alignment on this load.
+ // Try to fold a load. No need to check alignment.
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
- tryFoldVecLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4)) {
- SDValue Load = N1.getOperand(0);
+ if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
- Load.getOperand(0) };
+ N1.getOperand(0) };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
- ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
return CNode;
}
@@ -2659,24 +3154,18 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
- // If there is a load, it will be behind a bitcast. We don't need to check
- // alignment on this load.
+ // Try to fold a load. No need to check alignment.
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
- tryFoldVecLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
- Tmp3, Tmp4)) {
- SDValue Load = N2.getOperand(0);
+ if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
- Load.getOperand(0), InFlag };
+ N2.getOperand(0), InFlag };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
InFlag = SDValue(CNode, 3);
// Update the chain.
- ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+ ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
return CNode;
}
@@ -2687,6 +3176,93 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
return CNode;
}
+bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Only handle scalar shifts.
+ if (VT.isVector())
+ return false;
+
+ // Narrower shifts only mask to 5 bits in hardware.
+ unsigned Size = VT == MVT::i64 ? 64 : 32;
+
+ SDValue OrigShiftAmt = N->getOperand(1);
+ SDValue ShiftAmt = OrigShiftAmt;
+ SDLoc DL(N);
+
+ // Skip over a truncate of the shift amount.
+ if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
+ ShiftAmt = ShiftAmt->getOperand(0);
+
+ // This function is called after X86DAGToDAGISel::matchBitExtract(),
+ // so we are not afraid that we might mess up BZHI/BEXTR pattern.
+
+ SDValue NewShiftAmt;
+ if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+ SDValue Add0 = ShiftAmt->getOperand(0);
+ SDValue Add1 = ShiftAmt->getOperand(1);
+ // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD/SUB.
+ if (isa<ConstantSDNode>(Add1) &&
+ cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
+ NewShiftAmt = Add0;
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(Add0) &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
+ // Insert a negate op.
+ // TODO: This isn't guaranteed to replace the sub if there is a logic cone
+ // that uses it that's not a shift.
+ EVT SubVT = ShiftAmt.getValueType();
+ SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
+ SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+ NewShiftAmt = Neg;
+
+ // Insert these operands into a valid topological order so they can
+ // get selected independently.
+ insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
+ insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
+ } else
+ return false;
+ } else
+ return false;
+
+ if (NewShiftAmt.getValueType() != MVT::i8) {
+ // Need to truncate the shift amount.
+ NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
+ // Add to a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+ }
+
+ // Insert a new mask to keep the shift amount legal. This should be removed
+ // by isel patterns.
+ NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
+ CurDAG->getConstant(Size - 1, DL, MVT::i8));
+ // Place in a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+
+ SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+ NewShiftAmt);
+ if (UpdatedNode != N) {
+ // If we found an existing node, we should replace ourselves with that node
+ // and wait for it to be selected after its other users.
+ ReplaceNode(N, UpdatedNode);
+ return true;
+ }
+
+ // If the original shift amount is now dead, delete it so that we don't run
+ // it through isel.
+ if (OrigShiftAmt.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
+
+ // Now that we've optimized the shift amount, defer to normal isel to get
+ // load folding and legacy vs BMI2 selection without repeating it here.
+ SelectCode(N);
+ return true;
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -2795,9 +3371,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, getGlobalBaseReg());
return;
- case X86ISD::SELECT:
- case X86ISD::SHRUNKBLEND: {
- // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
+ case ISD::BITCAST:
+ // Just drop all 128/256/512-bit bitcasts.
+ if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
+ NVT == MVT::f128) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ break;
+
+ case X86ISD::BLENDV: {
+ // BLENDV selects like a regular VSELECT.
SDValue VSelect = CurDAG->getNode(
ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2));
@@ -2807,10 +3392,25 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
+ case ISD::SRL:
+ if (matchBitExtract(Node))
+ return;
+ LLVM_FALLTHROUGH;
+ case ISD::SRA:
+ case ISD::SHL:
+ if (tryShiftAmountMod(Node))
+ return;
+ break;
+
case ISD::AND:
- if (matchBEXTRFromAnd(Node))
+ if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ if (matchBitExtract(Node))
return;
- if (shrinkAndImmediate(Node))
+ if (AndImmShrink && shrinkAndImmediate(Node))
return;
LLVM_FALLTHROUGH;
@@ -2898,45 +3498,85 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
getI8Imm(ShlVal, dl));
return;
}
- case X86ISD::UMUL8:
- case X86ISD::SMUL8: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
-
- unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
-
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
- N0, SDValue()).getValue(1);
-
- SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-
- ReplaceNode(Node, CNode);
- return;
- }
-
+ case X86ISD::SMUL:
+ // i16/i32/i64 are handled with isel patterns.
+ if (NVT != MVT::i8)
+ break;
+ LLVM_FALLTHROUGH;
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- unsigned LoReg, Opc;
+ unsigned LoReg, ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- // MVT::i8 is handled by X86ISD::UMUL8.
- case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
- case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
- case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ case MVT::i8:
+ LoReg = X86::AL;
+ ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX;
+ ROpc = X86::MUL16r;
+ MOpc = X86::MUL16m;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX;
+ ROpc = X86::MUL32r;
+ MOpc = X86::MUL32m;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX;
+ ROpc = X86::MUL64r;
+ MOpc = X86::MUL64m;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!FoldedLoad) {
+ FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ MachineSDNode *CNode;
+ if (FoldedLoad) {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
+
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
+ } else {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+
+ CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+ }
- ReplaceNode(Node, CNode);
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
+ CurDAG->RemoveDeadNode(Node);
return;
}
@@ -2947,14 +3587,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc, MOpc;
bool isSigned = Opcode == ISD::SMUL_LOHI;
- bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
- MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
- case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
- MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
} else {
switch (NVT.SimpleTy) {
@@ -2975,12 +3612,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86::MUL64r:
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
break;
- case X86::MULX32rr:
- SrcReg = X86::EDX; LoReg = HiReg = 0;
- break;
- case X86::MULX64rr:
- SrcReg = X86::RDX; LoReg = HiReg = 0;
- break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2994,68 +3625,43 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
- SDValue ResHi, ResLo;
-
if (foldedLoad) {
SDValue Chain;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
- CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- ResHi = SDValue(CNode, 0);
- ResLo = SDValue(CNode, 1);
- Chain = SDValue(CNode, 2);
- InFlag = SDValue(CNode, 3);
- } else {
- SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- Chain = SDValue(CNode, 0);
- InFlag = SDValue(CNode, 1);
- }
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), Chain);
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
SDValue Ops[] = { N1, InFlag };
- if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
- SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- ResHi = SDValue(CNode, 0);
- ResLo = SDValue(CNode, 1);
- InFlag = SDValue(CNode, 2);
- } else {
- SDVTList VTs = CurDAG->getVTList(MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 0);
- }
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ InFlag = SDValue(CNode, 0);
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- if (!ResLo.getNode()) {
- assert(LoReg && "Register for low half is not defined!");
- ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
- InFlag);
- InFlag = ResLo.getValue(2);
- }
+ assert(LoReg && "Register for low half is not defined!");
+ SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
+ NVT, InFlag);
+ InFlag = ResLo.getValue(2);
ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- if (!ResHi.getNode()) {
- assert(HiReg && "Register for high half is not defined!");
- ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
- InFlag);
- InFlag = ResHi.getValue(2);
- }
+ assert(HiReg && "Register for high half is not defined!");
+ SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
+ NVT, InFlag);
+ InFlag = ResHi.getValue(2);
ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -3066,15 +3672,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
case ISD::SDIVREM:
- case ISD::UDIVREM:
- case X86ISD::SDIVREM8_SEXT_HREG:
- case X86ISD::UDIVREM8_ZEXT_HREG: {
+ case ISD::UDIVREM: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned Opc, MOpc;
- bool isSigned = (Opcode == ISD::SDIVREM ||
- Opcode == X86ISD::SDIVREM8_SEXT_HREG);
+ bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -3124,20 +3727,22 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
+ MachineSDNode *Move;
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
- Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
- MVT::Other, Ops), 0);
- Chain = Move.getValue(1);
+ Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+ MVT::Other, Ops);
+ Chain = SDValue(Move, 1);
ReplaceUses(N0.getValue(1), Chain);
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
} else {
- Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+ Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
+ SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
@@ -3188,9 +3793,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
// Record the mem-refs
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
- CNode->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
InFlag =
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
@@ -3213,13 +3816,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Result(RNode, 0);
InFlag = SDValue(RNode, 1);
- if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
- Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
- assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
- } else {
- Result =
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
- }
+ Result =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+
ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -3250,8 +3849,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- hasNoSignedComparisonUses(Node))
+ // Optimizations for TEST compares.
+ if (!isNullConstant(N1))
+ break;
+
+ // Save the original VT of the compare.
+ MVT CmpVT = N0.getSimpleValueType();
+
+ // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
+ // by a test instruction. The test should be removed later by
+ // analyzeCompare if we are using only the zero flag.
+ // TODO: Should we check the users and use the BEXTR flags directly?
+ if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
+ unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
+ : X86::TEST32rr;
+ SDValue BEXTR = SDValue(NewNode, 0);
+ NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
+
+ // We can peek through truncates, but we need to be careful below.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
N0 = N0.getOperand(0);
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -3259,33 +3881,75 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Look past the truncate if CMP is the only use of it.
if (N0.getOpcode() == ISD::AND &&
N0.getNode()->hasOneUse() &&
- N0.getValueType() != MVT::i8 &&
- X86::isZeroNode(N1)) {
+ N0.getValueType() != MVT::i8) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C) break;
uint64_t Mask = C->getZExtValue();
+ // Check if we can replace AND+IMM64 with a shift. This is possible for
+ // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
+ // flag.
+ if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
+ onlyUsesZeroFlag(SDValue(Node, 0))) {
+ if (isMask_64(~Mask)) {
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
+ N0.getOperand(0), Imm), 0);
+ MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+ MVT::i32, Shift, Shift);
+ ReplaceNode(Node, Test);
+ return;
+ }
+ if (isMask_64(Mask)) {
+ unsigned LeadingZeros = countLeadingZeros(Mask);
+ SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
+ SDValue Shift =
+ SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
+ N0.getOperand(0), Imm), 0);
+ MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+ MVT::i32, Shift, Shift);
+ ReplaceNode(Node, Test);
+ return;
+ }
+ }
+
MVT VT;
int SubRegOp;
- unsigned Op;
+ unsigned ROpc, MOpc;
+
+ // For each of these checks we need to be careful if the sign flag is
+ // being used. It is only safe to use the sign flag in two conditions,
+ // either the sign bit in the shrunken mask is zero or the final test
+ // size is equal to the original compare size.
if (isUInt<8>(Mask) &&
- (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
+ (!(Mask & 0x80) || CmpVT == MVT::i8 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, convert "testl %eax, $8" to "testb %al, $8"
VT = MVT::i8;
SubRegOp = X86::sub_8bit;
- Op = X86::TEST8ri;
+ ROpc = X86::TEST8ri;
+ MOpc = X86::TEST8mi;
} else if (OptForMinSize && isUInt<16>(Mask) &&
- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+ (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.
VT = MVT::i16;
SubRegOp = X86::sub_16bit;
- Op = X86::TEST16ri;
+ ROpc = X86::TEST16ri;
+ MOpc = X86::TEST16mi;
} else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
- (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
+ ((!(Mask & 0x80000000) &&
+ // Without minsize 16-bit Cmps can get here so we need to
+ // be sure we calculate the correct sign flag if needed.
+ (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
+ CmpVT == MVT::i32 ||
+ hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
// NOTE: We only want to run that transform if N0 is 32 or 64 bits.
// Otherwize, we find ourselves in a position where we have to do
@@ -3293,21 +3957,37 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// they had a good reason not to and do not promote here.
VT = MVT::i32;
SubRegOp = X86::sub_32bit;
- Op = X86::TEST32ri;
+ ROpc = X86::TEST32ri;
+ MOpc = X86::TEST32mi;
} else {
// No eligible transformation was found.
break;
}
+ // FIXME: We should be able to fold loads here.
+
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
SDValue Reg = N0.getOperand(0);
- // Extract the subregister if necessary.
- if (N0.getValueType() != VT)
- Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
-
// Emit a testl or testw.
- SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
+ MachineSDNode *NewNode;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
+ Reg.getOperand(0) };
+ NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
+ // Update the chain.
+ ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(NewNode,
+ {cast<LoadSDNode>(Reg)->getMemOperand()});
+ } else {
+ // Extract the subregister if necessary.
+ if (N0.getValueType() != VT)
+ Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
+
+ NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
+ }
// Replace CMP with TEST.
ReplaceNode(Node, NewNode);
return;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67a127fe0a2b..b6a692ee187d 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19,7 +19,6 @@
#include "X86InstrBuilder.h"
#include "X86IntrinsicsInfo.h"
#include "X86MachineFunctionInfo.h"
-#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -196,6 +195,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Funnel shifts.
+ for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ setOperationAction(ShiftOp , MVT::i16 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ShiftOp , MVT::i64 , Custom);
+ }
+
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
@@ -533,6 +540,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ // These might be better off as horizontal vector ops.
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
@@ -543,15 +554,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- // Expand FP immediates into loads from the stack, except for the special
- // cases we handle.
- addLegalFPImmediate(APFloat(+0.0)); // xorpd
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (UseX87 && X86ScalarSSEf32) {
+ } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
- addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+ if (UseX87)
+ addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
setOperationAction(ISD::FABS , MVT::f32, Custom);
@@ -559,10 +567,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f32, Custom);
- setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
// Use ANDPS and ORPS to simulate FCOPYSIGN.
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ if (UseX87)
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
@@ -570,17 +580,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- // Special cases we handle for FP constants.
- addLegalFPImmediate(APFloat(+0.0f)); // xorps
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-
- // Always expand sin/cos functions even though x87 has an instruction.
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ if (UseX87) {
+ // Always expand sin/cos functions even though x87 has an instruction.
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ }
} else if (UseX87) {
// f32 and f64 in x87.
// Set up the FP register classes.
@@ -596,14 +601,27 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
- addLegalFPImmediate(APFloat(+0.0)); // FLD0
- addLegalFPImmediate(APFloat(+1.0)); // FLD1
- addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- addLegalFPImmediate(APFloat(+0.0f)); // FLD0
- addLegalFPImmediate(APFloat(+1.0f)); // FLD1
- addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
- addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // Expand FP32 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f32)) {
+ if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ }
+ // Expand FP64 immediates into loads from the stack, save special cases.
+ if (isTypeLegal(MVT::f64)) {
+ if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ } else // SSE immediates.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// We don't support FMA.
@@ -613,7 +631,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Long double always uses X87, except f128 in MMX.
if (UseX87) {
if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
- addRegisterClass(MVT::f128, &X86::VR128RegClass);
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
@@ -778,11 +797,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
+ for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SREM, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::UREM, VT, Custom);
+ }
+
+ setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i8, Custom);
+
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
@@ -799,6 +833,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+
+ if (!ExperimentalVectorWideningLegalization) {
+ // Use widening instead of promotion.
+ for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16 }) {
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
+ }
+ }
+
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -813,7 +867,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -834,9 +888,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// scalars) and extend in-register to a legal 128-bit vector type. For sext
// loads these must work with a single scalar load.
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
@@ -857,21 +908,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
- setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
- }
-
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+
+ // Custom legalize these to avoid over promotion or custom promotion.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+
+ // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
+ // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
+ // split again based on the input type, this will cause an AssertSExt i16 to
+ // be emitted instead of an AssertZExt. This will allow packssdw followed by
+ // packuswb to be used to truncate to v8i8. This is necessary since packusdw
+ // isn't available until sse4.1.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -887,6 +953,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
+ // We want to legalize this to an f64 load rather than an i64 load on
+ // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
+ // store.
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
@@ -897,6 +975,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
+ if (ExperimentalVectorWideningLegalization) {
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ } else {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
+ }
+
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
@@ -907,7 +998,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
- setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+
+ // With AVX512, expanding (and promoting the shifts) is better.
+ if (!Subtarget.hasAVX512())
+ setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -919,6 +1013,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+
+ // These might be better off as horizontal vector ops.
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i16, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
@@ -953,17 +1053,22 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
- for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ if (!ExperimentalVectorWideningLegalization) {
+ // Avoid narrow result types when widening. The legal types are listed
+ // in the next loop.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+ }
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
+ if (!ExperimentalVectorWideningLegalization)
+ setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
@@ -1039,12 +1144,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
}
+ if (ExperimentalVectorWideningLegalization) {
+ // These types need custom splitting if their input is a 128-bit vector.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ }
+
setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
- setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
+
+ // With BWI, expanding (and promoting the shifts) is the better.
+ if (!Subtarget.hasBWI())
+ setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1061,9 +1180,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
+ // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
+ setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
+
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1086,19 +1207,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
-
+ setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ setOperationAction(ISD::ABS, MVT::v4i64, Custom);
setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1107,11 +1237,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
}
- if (HasInt256) {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
+ for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+ }
+ if (HasInt256) {
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1156,15 +1287,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (HasInt256)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
- setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
- }
-
if (HasInt256) {
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
@@ -1224,6 +1346,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1307,6 +1433,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ if (ExperimentalVectorWideningLegalization) {
+ // Need to custom widen this if we don't have AVX512BW.
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
+ }
+
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
setOperationAction(ISD::FCEIL, VT, Legal);
@@ -1315,12 +1448,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, VT, Legal);
}
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
-
// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+ }
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
@@ -1330,11 +1462,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
- setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
@@ -1347,7 +1482,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1358,13 +1492,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- // Need to promote to 64-bit even though we have 32-bit masked instructions
- // because the IR optimizers rearrange bitcasts around logic ops leaving
- // too many variations to handle if we don't promote them.
- setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
- setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
- setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
-
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
@@ -1378,7 +1505,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
setOperationAction(ISD::CTLZ, VT, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
@@ -1407,16 +1533,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
- setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
- }
-
// Need to custom split v32i16/v64i8 bitcasts.
if (!Subtarget.hasBWI()) {
setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
}
+
+ if (Subtarget.hasVBMI2()) {
+ for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
+ }
+ }
}// has AVX-512
// This block controls legalization for operations that don't have
@@ -1468,7 +1596,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasCDI()) {
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::CTLZ, VT, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
} // Subtarget.hasCDI()
@@ -1490,6 +1617,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1550,6 +1681,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
@@ -1563,17 +1695,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
- setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
+ // The condition codes aren't legal in SSE/AVX and under AVX512 we use
+ // setcc all the way to isel and prefer SETGT in some isel patterns.
+ setCondCodeAction(ISD::SETLT, VT, Custom);
+ setCondCodeAction(ISD::SETLE, VT, Custom);
}
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
@@ -1584,6 +1720,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 })
setOperationAction(ISD::CTPOP, VT, Legal);
}
+
+ if (Subtarget.hasVBMI2()) {
+ setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
+ }
}
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
@@ -1630,6 +1771,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
+
+ if (Subtarget.hasVBMI2()) {
+ // TODO: Make these legal even without VLX?
+ for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
+ }
+ }
}
// We want to custom lower some of our intrinsics.
@@ -1731,8 +1881,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
- setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1787,13 +1935,13 @@ SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
}
TargetLoweringBase::LegalizeTypeAction
-X86TargetLowering::getPreferredVectorAction(EVT VT) const {
+X86TargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return TypeSplitVector;
if (ExperimentalVectorWideningLegalization &&
VT.getVectorNumElements() != 1 &&
- VT.getVectorElementType().getSimpleVT() != MVT::i1)
+ VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -1926,7 +2074,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if (Subtarget.hasSSE2())
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
- if (Subtarget.hasSSE1())
+ // If we have SSE1 registers we should be able to use them.
+ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
@@ -3138,7 +3287,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
// If value is passed via pointer - do a load.
- if (VA.getLocInfo() == CCValAssign::Indirect)
+ if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
ArgValue =
DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
@@ -3621,13 +3770,29 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getBitcast(RegVT, Arg);
break;
case CCValAssign::Indirect: {
- // Store the argument.
- SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
- int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- Chain = DAG.getStore(
- Chain, dl, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- Arg = SpillSlot;
+ if (isByVal) {
+ // Memcpy the argument to a temporary stack slot to prevent
+ // the caller from seeing any modifications the callee may make
+ // as guaranteed by the `byval` attribute.
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(
+ Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
+ false);
+ SDValue StackSlot =
+ DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
+ Chain =
+ CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
+ // From now on treat this as a regular pointer
+ Arg = StackSlot;
+ isByVal = false;
+ } else {
+ // Store the argument.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ Chain = DAG.getStore(
+ Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ Arg = SpillSlot;
+ }
break;
}
}
@@ -4405,6 +4570,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
case X86ISD::VPERMV3:
return true;
// 'Faux' Target Shuffles.
+ case ISD::OR:
case ISD::AND:
case X86ISD::ANDNP:
return true;
@@ -4686,6 +4852,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+ // If we are using XMM registers in the ABI and the condition of the select is
+ // a floating-point compare and we have blendv or conditional move, then it is
+ // cheaper to select instead of doing a cross-register move and creating a
+ // load that depends on the compare result.
+ return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
// TODO: It might be a win to ease or lift this restriction, but the generic
// folds in DAGCombiner conflict with vector folds for an AVX512 target.
@@ -4695,6 +4869,31 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
return true;
}
+bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+ // TODO: We handle scalars using custom code, but generic combining could make
+ // that unnecessary.
+ APInt MulC;
+ if (!ISD::isConstantSplatVector(C.getNode(), MulC))
+ return false;
+
+ // If vector multiply is legal, assume that's faster than shl + add/sub.
+ // TODO: Multiply is a complex op with higher latency and lower througput in
+ // most implementations, so this check could be loosened based on type
+ // and/or a CPU attribute.
+ if (isOperationLegal(ISD::MUL, VT))
+ return false;
+
+ // shl+add, shl+sub, shl+add+neg
+ return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
+ (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
+}
+
+bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+ bool IsSigned) const {
+ // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
+ return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
+}
+
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
@@ -4709,6 +4908,18 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return (Index % ResVT.getVectorNumElements()) == 0;
}
+bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+}
+
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
return Subtarget.hasBMI();
@@ -4721,7 +4932,11 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
EVT BitcastVT) const {
- if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
+ if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
+ BitcastVT.getVectorElementType() == MVT::i1)
+ return false;
+
+ if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
return false;
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
@@ -4763,17 +4978,14 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (VT != MVT::i32 && VT != MVT::i64)
return false;
- // A mask and compare against constant is ok for an 'andn' too
- // even though the BMI instruction doesn't have an immediate form.
-
- return true;
+ return !isa<ConstantSDNode>(Y);
}
bool X86TargetLowering::hasAndNot(SDValue Y) const {
EVT VT = Y.getValueType();
- if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
- return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
+ if (!VT.isVector())
+ return hasAndNotCompare(Y);
// Vector.
@@ -4800,6 +5012,12 @@ bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
return true;
}
+bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
+ // Any legal vector type can be splatted more efficiently than
+ // loading/spilling from memory.
+ return isTypeLegal(VT);
+}
+
MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
MVT VT = MVT::getIntegerVT(NumBits);
if (isTypeLegal(VT))
@@ -5408,24 +5626,29 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
return DAG.getBitcast(VT, Vec);
}
-static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
+static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
SelectionDAG &DAG) {
EVT InVT = In.getValueType();
- assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
-
- if (VT.is128BitVector() && InVT.is128BitVector())
- return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
- : DAG.getZeroExtendVectorInReg(In, DL, VT);
+ assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
// For 256-bit vectors, we only need the lower (128-bit) input half.
// For 512-bit vectors, we only need the lower input half or quarter.
- if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
- int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
+ if (InVT.getSizeInBits() > 128) {
+ assert(VT.getSizeInBits() == InVT.getSizeInBits() &&
+ "Expected VTs to be the same size!");
+ unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
In = extractSubVector(In, 0, DAG, DL,
- std::max(128, (int)VT.getSizeInBits() / Scale));
+ std::max(128U, VT.getSizeInBits() / Scale));
+ InVT = In.getValueType();
}
- return DAG.getNode(Opc, DL, VT, In);
+ if (VT.getVectorNumElements() == InVT.getVectorNumElements())
+ return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ DL, VT, In);
+
+ return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG,
+ DL, VT, In);
}
/// Returns a vector_shuffle node for an unpackl operation.
@@ -5463,19 +5686,6 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-static SDValue peekThroughBitcasts(SDValue V) {
- while (V.getNode() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
-static SDValue peekThroughOneUseBitcasts(SDValue V) {
- while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
- V.getOperand(0).hasOneUse())
- V = V.getOperand(0);
- return V;
-}
-
// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
@@ -5496,10 +5706,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
Ptr = Ptr->getOperand(0);
auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
- if (!CNode || CNode->isMachineConstantPoolEntry())
+ if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
return nullptr;
- return dyn_cast<Constant>(CNode->getConstVal());
+ return CNode->getConstVal();
}
// Extract raw constant bits from constant pools.
@@ -5632,15 +5842,34 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+ APInt UndefSrcElts(NumSrcElts, 0);
+ SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ const SDValue &Src = Op.getOperand(i);
+ if (Src.isUndef()) {
+ UndefSrcElts.setBit(i);
+ continue;
+ }
+ auto *Cst = cast<ConstantFPSDNode>(Src);
+ APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
+ SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
+ }
+ return CastBitData(UndefSrcElts, SrcEltBits);
+ }
// Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
- if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
+ unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
+ if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
return false;
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumSrcElts = CstTy->getVectorNumElements();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
@@ -5685,19 +5914,107 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ // Extract constant bits from a subvector's source.
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(1))) {
+ // TODO - support extract_subvector through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumSubElts = VT.getVectorNumElements();
+ unsigned BaseIdx = Op.getConstantOperandVal(1);
+ UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
+ if ((BaseIdx + NumSubElts) != NumSrcElts)
+ EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
+ if (BaseIdx != 0)
+ EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
+ return true;
+ }
+ }
+
+ // Extract constant bits from shuffle node sources.
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
+ // TODO - support shuffle through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ ArrayRef<int> Mask = SVN->getMask();
+ if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
+ llvm::any_of(Mask, [](int M) { return M < 0; }))
+ return false;
+
+ APInt UndefElts0, UndefElts1;
+ SmallVector<APInt, 32> EltBits0, EltBits1;
+ if (isAnyInRange(Mask, 0, NumElts) &&
+ !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts0, EltBits0, AllowWholeUndefs,
+ AllowPartialUndefs))
+ return false;
+ if (isAnyInRange(Mask, NumElts, 2 * NumElts) &&
+ !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ UndefElts1, EltBits1, AllowWholeUndefs,
+ AllowPartialUndefs))
+ return false;
+
+ UndefElts = APInt::getNullValue(NumElts);
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ EltBits.push_back(APInt::getNullValue(EltSizeInBits));
+ } else if (M < (int)NumElts) {
+ if (UndefElts0[M])
+ UndefElts.setBit(i);
+ EltBits.push_back(EltBits0[M]);
+ } else {
+ if (UndefElts1[M - NumElts])
+ UndefElts.setBit(i);
+ EltBits.push_back(EltBits1[M - NumElts]);
+ }
+ }
+ return true;
+ }
+
return false;
}
-static bool getTargetShuffleMaskIndices(SDValue MaskNode,
- unsigned MaskEltSizeInBits,
- SmallVectorImpl<uint64_t> &RawMask) {
+static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
APInt UndefElts;
- SmallVector<APInt, 64> EltBits;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
+ UndefElts, EltBits, true, false)) {
+ int SplatIndex = -1;
+ for (int i = 0, e = EltBits.size(); i != e; ++i) {
+ if (UndefElts[i])
+ continue;
+ if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
+ SplatIndex = -1;
+ break;
+ }
+ SplatIndex = i;
+ }
+ if (0 <= SplatIndex) {
+ SplatVal = EltBits[SplatIndex];
+ return true;
+ }
+ }
+
+ return false;
+}
+static bool getTargetShuffleMaskIndices(SDValue MaskNode,
+ unsigned MaskEltSizeInBits,
+ SmallVectorImpl<uint64_t> &RawMask,
+ APInt &UndefElts) {
// Extract the raw target constant bits.
- // FIXME: We currently don't support UNDEF bits or mask entries.
+ SmallVector<APInt, 64> EltBits;
if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
- EltBits, /* AllowWholeUndefs */ false,
+ EltBits, /* AllowWholeUndefs */ true,
/* AllowPartialUndefs */ false))
return false;
@@ -5726,6 +6043,31 @@ static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
}
}
+// Split the demanded elts of a PACKSS/PACKUS node between its operands.
+static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
+ APInt &DemandedLHS, APInt &DemandedRHS) {
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumElts = DemandedElts.getBitWidth();
+ int NumInnerElts = NumElts / 2;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumInnerEltsPerLane = NumInnerElts / NumLanes;
+
+ DemandedLHS = APInt::getNullValue(NumInnerElts);
+ DemandedRHS = APInt::getNullValue(NumInnerElts);
+
+ // Map DemandedElts to the packed operands.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
+ int OuterIdx = (Lane * NumEltsPerLane) + Elt;
+ int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
+ if (DemandedElts[OuterIdx])
+ DemandedLHS.setBit(InnerIdx);
+ if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
+ DemandedRHS.setBit(InnerIdx);
+ }
+ }
+}
+
/// Calculates the shuffle mask corresponding to the target-specific opcode.
/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
/// operands in \p Ops, and returns true.
@@ -5737,6 +6079,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<SDValue> &Ops,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
+ unsigned MaskEltSize = VT.getScalarSizeInBits();
+ SmallVector<uint64_t, 32> RawMask;
+ APInt RawUndefs;
SDValue ImmN;
assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
@@ -5744,26 +6089,26 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
IsUnary = false;
bool IsFakeUnary = false;
- switch(N->getOpcode()) {
+ switch (N->getOpcode()) {
case X86ISD::BLENDI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPMask(NumElems, VT.getScalarSizeInBits(),
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ DecodeSHUFPMask(NumElems, MaskEltSize,
cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::INSERTPS:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
@@ -5773,8 +6118,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
isa<ConstantSDNode>(N->getOperand(2))) {
int BitLen = N->getConstantOperandVal(1);
int BitIdx = N->getConstantOperandVal(2);
- DecodeEXTRQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
- Mask);
+ DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
IsUnary = true;
}
break;
@@ -5785,21 +6129,20 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
isa<ConstantSDNode>(N->getOperand(3))) {
int BitLen = N->getConstantOperandVal(2);
int BitIdx = N->getConstantOperandVal(3);
- DecodeINSERTQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
- Mask);
+ DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
}
break;
case X86ISD::UNPCKH:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- DecodeUNPCKHMask(NumElems, VT.getScalarSizeInBits(), Mask);
+ DecodeUNPCKHMask(NumElems, MaskEltSize, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- DecodeUNPCKLMask(NumElems, VT.getScalarSizeInBits(), Mask);
+ DecodeUNPCKLMask(NumElems, MaskEltSize, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
@@ -5818,7 +6161,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePALIGNRMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5844,21 +6187,21 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(NumElems, VT.getScalarSizeInBits(),
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ DecodePSHUFMask(NumElems, MaskEltSize,
cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSHUFHWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodePSHUFLWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = true;
@@ -5891,14 +6234,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMILPMask(NumElems, VT.getScalarSizeInBits(), RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMILPMask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -5909,20 +6247,15 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
- DecodePSHUFBMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodePSHUFBMask(C, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+ DecodePSHUFBMask(RawMask, RawUndefs, Mask);
break;
}
return false;
}
case X86ISD::VPERMI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeVPERMMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
@@ -5935,7 +6268,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::VPERM2X128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
DecodeVPERM2X128Mask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5943,10 +6276,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::SHUF128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands()-1);
- decodeVSHUF64x2FamilyMask(NumElems, VT.getScalarSizeInBits(),
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getOperand(N->getNumOperands() - 1);
+ decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
@@ -5968,19 +6300,14 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
SDValue MaskNode = N->getOperand(2);
SDValue CtrlNode = N->getOperand(3);
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
unsigned CtrlImm = CtrlOp->getZExtValue();
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMIL2PMask(NumElems, VT.getScalarSizeInBits(), CtrlImm,
- RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs,
+ Mask);
break;
}
}
@@ -5991,13 +6318,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
SDValue MaskNode = N->getOperand(2);
- SmallVector<uint64_t, 32> RawMask;
- if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
- DecodeVPPERMMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPPERMMask(C, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+ DecodeVPPERMMask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6008,14 +6330,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
// Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
Ops.push_back(N->getOperand(1));
SDValue MaskNode = N->getOperand(0);
- SmallVector<uint64_t, 32> RawMask;
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
- DecodeVPERMVMask(RawMask, Mask);
- break;
- }
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMVMask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMVMask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6028,9 +6345,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(1);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMV3Mask(C, MaskEltSize, Mask);
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+ RawUndefs)) {
+ DecodeVPERMV3Mask(RawMask, RawUndefs, Mask);
break;
}
return false;
@@ -6147,6 +6464,12 @@ static bool setTargetShuffleZeroElements(SDValue N,
return true;
}
+// Forward declaration (for getFauxShuffleMask recursive check).
+static bool resolveTargetShuffleInputs(SDValue Op,
+ SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask,
+ const SelectionDAG &DAG);
+
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
@@ -6200,6 +6523,78 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
Ops.push_back(IsAndN ? N1 : N0);
return true;
}
+ case ISD::OR: {
+ // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
+ // is a valid shuffle index.
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+ if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
+ return false;
+ SmallVector<int, 64> SrcMask0, SrcMask1;
+ SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
+ if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
+ !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
+ return false;
+ int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
+ SmallVector<int, 64> Mask0, Mask1;
+ scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
+ scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
+ Mask.push_back(SM_SentinelUndef);
+ else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
+ Mask.push_back(SM_SentinelZero);
+ else if (Mask1[i] == SM_SentinelZero)
+ Mask.push_back(Mask0[i]);
+ else if (Mask0[i] == SM_SentinelZero)
+ Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size()));
+ else
+ return false;
+ }
+ for (SDValue &Op : SrcInputs0)
+ Ops.push_back(Op);
+ for (SDValue &Op : SrcInputs1)
+ Ops.push_back(Op);
+ return true;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
+ // SRC0/SRC1 are both of the same valuetype VT.
+ // TODO - add peekThroughOneUseBitcasts support.
+ SDValue Src = N.getOperand(0);
+ SDValue Sub = N.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if (!isa<ConstantSDNode>(N.getOperand(2)) ||
+ !N->isOnlyUserOf(Sub.getNode()))
+ return false;
+ SmallVector<int, 64> SubMask;
+ SmallVector<SDValue, 2> SubInputs;
+ if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) ||
+ SubMask.size() != NumSubElts)
+ return false;
+ Ops.push_back(Src);
+ for (SDValue &SubInput : SubInputs) {
+ if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ SubInput.getOperand(0).getValueType() != VT ||
+ !isa<ConstantSDNode>(SubInput.getOperand(1)))
+ return false;
+ Ops.push_back(SubInput.getOperand(0));
+ }
+ int InsertIdx = N.getConstantOperandVal(2);
+ for (int i = 0; i != (int)NumElts; ++i)
+ Mask.push_back(i);
+ for (int i = 0; i != (int)NumSubElts; ++i) {
+ int M = SubMask[i];
+ if (0 <= M) {
+ int InputIdx = M / NumSubElts;
+ int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
+ M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts);
+ }
+ Mask[i + InsertIdx] = M;
+ }
+ return true;
+ }
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
// for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
@@ -6334,14 +6729,14 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::ZERO_EXTEND_VECTOR_INREG:
- case X86ISD::VZEXT: {
+ case ISD::ZERO_EXTEND: {
// TODO - add support for VPMOVZX with smaller input vector types.
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (NumSizeInBits != SrcVT.getSizeInBits())
break;
- DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), VT.getScalarSizeInBits(),
- VT.getVectorNumElements(), Mask);
+ DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+ Mask);
Ops.push_back(Src);
return true;
}
@@ -6586,6 +6981,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ // If this is a splat of a pair of elements, use MOVDDUP (unless the target
+ // has XOP; in that case defer lowering to potentially use VPERMIL2PS).
+ // Because we're creating a less complicated build vector here, we may enable
+ // further folding of the MOVDDUP via shuffle transforms.
+ if (Subtarget.hasSSE3() && !Subtarget.hasXOP() &&
+ Op.getOperand(0) == Op.getOperand(2) &&
+ Op.getOperand(1) == Op.getOperand(3) &&
+ Op.getOperand(0) != Op.getOperand(1)) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ // Create a new build vector with the first 2 elements followed by undef
+ // padding, bitcast to v2f64, duplicate, and bitcast back.
+ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
+ DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
+ SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
+ return DAG.getBitcast(VT, Dup);
+ }
+
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
@@ -7059,9 +7474,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
}
- // We need a splat of a single value to use broadcast, and it doesn't
- // make any sense if the value is only in one element of the vector.
- if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumUndefElts = UndefElements.count();
+ if (!Ld || (NumElts - NumUndefElts) <= 1) {
APInt SplatValue, Undef;
unsigned SplatBitSize;
bool HasUndef;
@@ -7137,7 +7552,17 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
}
}
}
- return SDValue();
+
+ // If we are moving a scalar into a vector (Ld must be set and all elements
+ // but 1 are undef) and that operation is not obviously supported by
+ // vmovd/vmovq/vmovss/vmovsd, then keep trying to form a broadcast.
+ // That's better than general shuffling and may eliminate a load to GPR and
+ // move from scalar to vector register.
+ if (!Ld || NumElts - NumUndefElts != 1)
+ return SDValue();
+ unsigned ScalarSize = Ld.getValueSizeInBits();
+ if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64)))
+ return SDValue();
}
bool ConstSplatVal =
@@ -7434,13 +7859,14 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
return DstVec;
}
-/// Return true if \p N implements a horizontal binop and return the
-/// operands for the horizontal binop into V0 and V1.
-///
/// This is a helper function of LowerToHorizontalOp().
/// This function checks that the build_vector \p N in input implements a
-/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
-/// operation to match.
+/// 128-bit partial horizontal operation on a 256-bit vector, but that operation
+/// may not match the layout of an x86 256-bit horizontal instruction.
+/// In other words, if this returns true, then some extraction/insertion will
+/// be required to produce a valid horizontal instruction.
+///
+/// Parameter \p Opcode defines the kind of horizontal operation to match.
/// For example, if \p Opcode is equal to ISD::ADD, then this function
/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
/// is equal to ISD::SUB, then this function checks if this is a horizontal
@@ -7448,12 +7874,17 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
///
/// This function only analyzes elements of \p N whose indices are
/// in range [BaseIdx, LastIdx).
-static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
- SelectionDAG &DAG,
- unsigned BaseIdx, unsigned LastIdx,
- SDValue &V0, SDValue &V1) {
+///
+/// TODO: This function was originally used to match both real and fake partial
+/// horizontal operations, but the index-matching logic is incorrect for that.
+/// See the corrected implementation in isHopBuildVector(). Can we reduce this
+/// code because it is only used for partial h-op matching now?
+static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
+ SelectionDAG &DAG,
+ unsigned BaseIdx, unsigned LastIdx,
+ SDValue &V0, SDValue &V1) {
EVT VT = N->getValueType(0);
-
+ assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
@@ -7623,7 +8054,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// adding/subtracting two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
// subtracting/adding two integer/float elements.
- unsigned Opc[2] {0, 0};
+ unsigned Opc[2] = {0, 0};
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -7794,17 +8225,158 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
+static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+ unsigned &HOpcode, SDValue &V0, SDValue &V1) {
+ // Initialize outputs to known values.
+ MVT VT = BV->getSimpleValueType(0);
+ HOpcode = ISD::DELETED_NODE;
+ V0 = DAG.getUNDEF(VT);
+ V1 = DAG.getUNDEF(VT);
+
+ // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit
+ // half of the result is calculated independently from the 128-bit halves of
+ // the inputs, so that makes the index-checking logic below more complicated.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned GenericOpcode = ISD::DELETED_NODE;
+ unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1;
+ unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
+ unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
+ for (unsigned i = 0; i != Num128BitChunks; ++i) {
+ for (unsigned j = 0; j != NumEltsIn128Bits; ++j) {
+ // Ignore undef elements.
+ SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);
+ if (Op.isUndef())
+ continue;
+
+ // If there's an opcode mismatch, we're done.
+ if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode)
+ return false;
+
+ // Initialize horizontal opcode.
+ if (HOpcode == ISD::DELETED_NODE) {
+ GenericOpcode = Op.getOpcode();
+ switch (GenericOpcode) {
+ case ISD::ADD: HOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+ default: return false;
+ }
+ }
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op0.getOperand(0) != Op1.getOperand(0) ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse())
+ return false;
+
+ // The source vector is chosen based on which 64-bit half of the
+ // destination vector is being calculated.
+ if (j < NumEltsIn64Bits) {
+ if (V0.isUndef())
+ V0 = Op0.getOperand(0);
+ } else {
+ if (V1.isUndef())
+ V1 = Op0.getOperand(0);
+ }
+
+ SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
+ if (SourceVec != Op0.getOperand(0))
+ return false;
+
+ // op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
+ unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
+ unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
+ unsigned ExpectedIndex = i * NumEltsIn128Bits +
+ (j % NumEltsIn64Bits) * 2;
+ if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
+ continue;
+
+ // If this is not a commutative op, this does not match.
+ if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD)
+ return false;
+
+ // Addition is commutative, so try swapping the extract indexes.
+ // op (extract_vector_elt A, I+1), (extract_vector_elt A, I)
+ if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
+ continue;
+
+ // Extract indexes do not match horizontal requirement.
+ return false;
+ }
+ }
+ // We matched. Opcode and operands are returned by reference as arguments.
+ return true;
+}
+
+static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
+ SelectionDAG &DAG, unsigned HOpcode,
+ SDValue V0, SDValue V1) {
+ // If either input vector is not the same size as the build vector,
+ // extract/insert the low bits to the correct size.
+ // This is free (examples: zmm --> xmm, xmm --> ymm).
+ MVT VT = BV->getSimpleValueType(0);
+ unsigned Width = VT.getSizeInBits();
+ if (V0.getValueSizeInBits() > Width)
+ V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width);
+ else if (V0.getValueSizeInBits() < Width)
+ V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width);
+
+ if (V1.getValueSizeInBits() > Width)
+ V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width);
+ else if (V1.getValueSizeInBits() < Width)
+ V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
+
+ return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
+}
+
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ // We need at least 2 non-undef elements to make this worthwhile by default.
+ unsigned NumNonUndefs = 0;
+ for (const SDValue &V : BV->op_values())
+ if (!V.isUndef())
+ ++NumNonUndefs;
+
+ if (NumNonUndefs < 2)
+ return SDValue();
+
+ // There are 4 sets of horizontal math operations distinguished by type:
+ // int/FP at 128-bit/256-bit. Each type was introduced with a different
+ // subtarget feature. Try to match those "native" patterns first.
MVT VT = BV->getSimpleValueType(0);
+ unsigned HOpcode;
+ SDValue V0, V1;
+ if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())
+ if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+ return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+ // Try harder to match 256-bit ops by using extract/concat.
+ if (!Subtarget.hasAVX() || !VT.is256BitVector())
+ return SDValue();
+
+ // Count the number of UNDEF operands in the build_vector in input.
unsigned NumElts = VT.getVectorNumElements();
+ unsigned Half = NumElts / 2;
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
- unsigned Half = NumElts/2;
-
- // Count the number of UNDEF operands in the build_vector in input.
for (unsigned i = 0, e = Half; i != e; ++i)
if (BV->getOperand(i)->isUndef())
NumUndefsLO++;
@@ -7813,96 +8385,61 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
if (BV->getOperand(i)->isUndef())
NumUndefsHI++;
- // Early exit if this is either a build_vector of all UNDEFs or all the
- // operands but one are UNDEF.
- if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
- return SDValue();
-
SDLoc DL(BV);
SDValue InVec0, InVec1;
- if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
- // Try to match an SSE3 float HADD/HSUB.
- if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
- } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
- // Try to match an SSSE3 integer HADD/HSUB.
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
- return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
- }
-
- if (!Subtarget.hasAVX())
- return SDValue();
-
- if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
- // Try to match an AVX horizontal add/sub of packed single/double
- // precision floating point values from 256-bit vectors.
- SDValue InVec2, InVec3;
- if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
- if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
- return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
- } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
- // Try to match an AVX2 horizontal add/sub of signed integers.
+ if (VT == MVT::v8i32 || VT == MVT::v16i16) {
SDValue InVec2, InVec3;
unsigned X86Opcode;
bool CanFold = true;
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
+ InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
- isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
- ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
- ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
+ InVec1) &&
+ isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
+ InVec3) &&
+ ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
+ ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HSUB;
else
CanFold = false;
if (CanFold) {
- // Fold this build_vector into a single horizontal add/sub.
- // Do this only if the target has AVX2.
- if (Subtarget.hasAVX2())
- return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
-
// Do not try to expand this build_vector into a pair of horizontal
// add/sub if we can emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
- // Convert this build_vector into a pair of horizontal binop followed by
- // a concat vector.
+ // Convert this build_vector into a pair of horizontal binops followed by
+ // a concat vector. We must adjust the outputs from the partial horizontal
+ // matching calls above to account for undefined vector halves.
+ SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
+ SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
+ assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
- return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
- isUndefLO, isUndefHI);
+ return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
+ isUndefHI);
}
}
- if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) && Subtarget.hasAVX()) {
+ if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
+ VT == MVT::v16i16) {
unsigned X86Opcode;
- if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HADD;
- else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::HSUB;
- else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::FHADD;
- else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
+ InVec1))
X86Opcode = X86ISD::FHSUB;
else
return SDValue();
@@ -8370,9 +8907,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If we are inserting one variable into a vector of non-zero constants, try
// to avoid loading each constant element as a scalar. Load the constants as a
// vector and then insert the variable scalar element. If insertion is not
- // supported, we assume that we will fall back to a shuffle to get the scalar
- // blended with the constants. Insertion into a zero vector is handled as a
- // special-case somewhere below here.
+ // supported, fall back to a shuffle to get the scalar blended with the
+ // constants. Insertion into a zero vector is handled as a special-case
+ // somewhere below here.
if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
(isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
@@ -8410,7 +8947,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+ unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue();
+ unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();
+ if (InsertC < NumEltsInLow128Bits)
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+
+ // There's no good way to insert into the high elements of a >128-bit
+ // vector, so use shuffles to avoid an extract/insert sequence.
+ assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");
+ assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");
+ SmallVector<int, 8> ShuffleMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i == InsertC ? NumElts : i);
+ SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
+ return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);
}
// Special case for single non-zero, non-undef, element.
@@ -9097,6 +9648,28 @@ static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
return TargetMask;
}
+// Attempt to create a shuffle mask from a VSELECT condition mask.
+static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
+ SDValue Cond) {
+ if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ return false;
+
+ unsigned Size = Cond.getValueType().getVectorNumElements();
+ Mask.resize(Size, SM_SentinelUndef);
+
+ for (int i = 0; i != (int)Size; ++i) {
+ SDValue CondElt = Cond.getOperand(i);
+ Mask[i] = i;
+ // Arbitrarily choose from the 2nd operand if the select condition element
+ // is undef.
+ // TODO: Can we do better by matching patterns such as even/odd?
+ if (CondElt.isUndef() || isNullConstant(CondElt))
+ Mask[i] += Size;
+ }
+
+ return true;
+}
+
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
@@ -9664,11 +10237,7 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
- // We have to cast V2 around.
- MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
- V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
- DAG.getBitcast(MaskVT, V1Mask),
- DAG.getBitcast(MaskVT, V2)));
+ V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2);
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
@@ -9762,7 +10331,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
-
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
@@ -9794,7 +10362,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
-
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
@@ -9808,6 +10375,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
+ // Use PBLENDW for lower/upper lanes and then blend lanes.
+ // TODO - we should allow 2 PBLENDW here and leave shuffle combine to
+ // merge to VSELECT where useful.
+ uint64_t LoMask = BlendMask & 0xFF;
+ uint64_t HiMask = (BlendMask >> 8) & 0xFF;
+ if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
+ SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(LoMask, DL, MVT::i8));
+ SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(HiMask, DL, MVT::i8));
+ return DAG.getVectorShuffle(
+ MVT::v16i16, DL, Lo, Hi,
+ {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
+ }
LLVM_FALLTHROUGH;
}
case MVT::v16i8:
@@ -9815,6 +10396,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+ return Masked;
+
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
@@ -9822,11 +10408,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
- // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
- if (SDValue Masked =
- lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
- return Masked;
-
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -9834,6 +10415,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// type.
MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ // x86 allows load folding with blendvb from the 2nd source operand. But
+ // we are still using LLVM select here (see comment below), so that's V1.
+ // If V2 can be load-folded and V1 cannot be load-folded, then commute to
+ // allow that load-folding possibility.
+ if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ }
+
// Compute the VSELECT mask. Note that VSELECT is really confusing in the
// mix of LLVM's code generator and the x86 backend. We tell the code
// generator that boolean values in the elements of an x86 vector register
@@ -9884,7 +10474,8 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool ImmBlends = false) {
// We build up the blend mask while checking whether a blend is a viable way
// to reduce the shuffle.
SmallVector<int, 32> BlendMask(Mask.size(), -1);
@@ -9904,10 +10495,168 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
PermuteMask[i] = Mask[i] % Size;
}
+ // If only immediate blends, then bail if the blend mask can't be widened to
+ // i16.
+ unsigned EltSize = VT.getScalarSizeInBits();
+ if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask))
+ return SDValue();
+
SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
}
+/// Try to lower as an unpack of elements from two inputs followed by
+/// a single-input permutation.
+///
+/// This matches the pattern where we can unpack elements from two inputs and
+/// then reduce the shuffle to a single-input (wider) permutation.
+static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ int NumElts = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumLaneElts = NumElts / NumLanes;
+ int NumHalfLaneElts = NumLaneElts / 2;
+
+ bool MatchLo = true, MatchHi = true;
+ SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+
+ // Determine UNPCKL/UNPCKH type and operand order.
+ for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+ for (int Elt = 0; Elt != NumLaneElts; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+
+ SDValue &Op = Ops[Elt & 1];
+ if (M < NumElts && (Op.isUndef() || Op == V1))
+ Op = V1;
+ else if (NumElts <= M && (Op.isUndef() || Op == V2))
+ Op = V2;
+ else
+ return SDValue();
+
+ int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts;
+ MatchLo &= isUndefOrInRange(M, Lo, Mid) ||
+ isUndefOrInRange(M, NumElts + Lo, NumElts + Mid);
+ MatchHi &= isUndefOrInRange(M, Mid, Hi) ||
+ isUndefOrInRange(M, NumElts + Mid, NumElts + Hi);
+ if (!MatchLo && !MatchHi)
+ return SDValue();
+ }
+ }
+ assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI");
+
+ // Now check that each pair of elts come from the same unpack pair
+ // and set the permute mask based on each pair.
+ // TODO - Investigate cases where we permute individual elements.
+ SmallVector<int, 32> PermuteMask(NumElts, -1);
+ for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+ for (int Elt = 0; Elt != NumLaneElts; Elt += 2) {
+ int M0 = Mask[Lane + Elt + 0];
+ int M1 = Mask[Lane + Elt + 1];
+ if (0 <= M0 && 0 <= M1 &&
+ (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts))
+ return SDValue();
+ if (0 <= M0)
+ PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts));
+ if (0 <= M1)
+ PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1;
+ }
+ }
+
+ unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);
+ return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask);
+}
+
+/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
+/// permuting the elements of the result in place.
+static SDValue lowerVectorShuffleAsByteRotateAndPermute(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+ if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget.hasAVX2()) ||
+ (VT.is512BitVector() && !Subtarget.hasBWI()))
+ return SDValue();
+
+ // We don't currently support lane crossing permutes.
+ if (is128BitLaneCrossingShuffleMask(VT, Mask))
+ return SDValue();
+
+ int Scale = VT.getScalarSizeInBits() / 8;
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsPerLane = NumElts / NumLanes;
+
+ // Determine range of mask elts.
+ bool Blend1 = true;
+ bool Blend2 = true;
+ std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
+ std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
+ for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+ for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+ if (M < NumElts) {
+ Blend1 &= (M == (Lane + Elt));
+ assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+ M = M % NumEltsPerLane;
+ Range1.first = std::min(Range1.first, M);
+ Range1.second = std::max(Range1.second, M);
+ } else {
+ M -= NumElts;
+ Blend2 &= (M == (Lane + Elt));
+ assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+ M = M % NumEltsPerLane;
+ Range2.first = std::min(Range2.first, M);
+ Range2.second = std::max(Range2.second, M);
+ }
+ }
+ }
+
+ // Bail if we don't need both elements.
+ // TODO - it might be worth doing this for unary shuffles if the permute
+ // can be widened.
+ if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) ||
+ !(0 <= Range2.first && Range2.second < NumEltsPerLane))
+ return SDValue();
+
+ if (VT.getSizeInBits() > 128 && (Blend1 || Blend2))
+ return SDValue();
+
+ // Rotate the 2 ops so we can access both ranges, then permute the result.
+ auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) {
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ SDValue Rotate = DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
+ DAG.getBitcast(ByteVT, Lo),
+ DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
+ SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
+ for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+ for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+ int M = Mask[Lane + Elt];
+ if (M < 0)
+ continue;
+ if (M < NumElts)
+ PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
+ else
+ PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
+ }
+ }
+ return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask);
+ };
+
+ // Check if the ranges are small enough to rotate from either direction.
+ if (Range2.second < Range1.first)
+ return RotateAndPermute(V1, V2, Range1.first, 0);
+ if (Range1.second < Range2.first)
+ return RotateAndPermute(V2, V1, Range2.first, NumElts);
+ return SDValue();
+}
+
/// Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
@@ -9915,11 +10664,9 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
/// operations. It will try to pick the best arrangement of shuffles and
/// blends.
-static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
- MVT VT, SDValue V1,
- SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
// Shuffle the input elements into the desired positions in V1 and V2 and
// blend them together.
SmallVector<int, 32> V1Mask(Mask.size(), -1);
@@ -9934,15 +10681,27 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
BlendMask[i] = i + Size;
}
- // Try to lower with the simpler initial blend strategy unless one of the
- // input shuffles would be a no-op. We prefer to shuffle inputs as the
- // shuffle may be able to fold with a load or other benefit. However, when
- // we'll have to do 2x as many shuffles in order to achieve this, blending
- // first is a better strategy.
- if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
+ // Try to lower with the simpler initial blend/unpack/rotate strategies unless
+ // one of the input shuffles would be a no-op. We prefer to shuffle inputs as
+ // the shuffle may be able to fold with a load or other benefit. However, when
+ // we'll have to do 2x as many shuffles in order to achieve this, a 2-input
+ // pre-shuffle first is a better strategy.
+ if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
+ // Only prefer immediate blends to unpack/rotate.
+ if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
+ DL, VT, V1, V2, Mask, DAG, true))
+ return BlendPerm;
+ if (SDValue UnpackPerm =
+ lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+ return UnpackPerm;
+ if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute(
+ DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return RotatePerm;
+ // Unpack/rotate failed - try again with variable blends.
if (SDValue BlendPerm =
lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
return BlendPerm;
+ }
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
@@ -10452,7 +11211,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
- InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
+ InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -10930,7 +11689,8 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
continue;
}
case ISD::CONCAT_VECTORS: {
- int OperandSize = Mask.size() / V.getNumOperands();
+ int OperandSize =
+ V.getOperand(0).getSimpleValueType().getVectorNumElements();
V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;
continue;
@@ -10989,7 +11749,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue BC = peekThroughBitcasts(V);
// Also check the simpler case, where we can directly reuse the scalar.
- if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
@@ -11204,10 +11964,9 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
"This routine only supports integer vectors.");
assert(VT.is128BitVector() &&
@@ -11276,6 +12035,12 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
return Unpack;
+ // If we're shuffling with a zero vector then we're better off not doing
+ // VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements.
+ if (ISD::isBuildVectorAllZeros(V1.getNode()) ||
+ ISD::isBuildVectorAllZeros(V2.getNode()))
+ return SDValue();
+
// If none of the unpack-rooted lowerings worked (or were profitable) try an
// initial unpack.
if (NumLoInputs == 0 || NumHiInputs == 0) {
@@ -11475,7 +12240,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
@@ -11785,11 +12550,11 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v4i32, V1, V2, Mask, DAG))
+ DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Unpack;
}
@@ -12321,47 +13086,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
/// blend if only one input is used.
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
- bool &V2InUse) {
- SDValue V1Mask[16];
- SDValue V2Mask[16];
+ const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+ assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
+ "Lane crossing shuffle masks not supported");
+
+ int NumBytes = VT.getSizeInBits() / 8;
+ int Size = Mask.size();
+ int Scale = NumBytes / Size;
+
+ SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8));
+ SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8));
V1InUse = false;
V2InUse = false;
- int Size = Mask.size();
- int Scale = 16 / Size;
- for (int i = 0; i < 16; ++i) {
- if (Mask[i / Scale] < 0) {
- V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
- } else {
- const int ZeroMask = 0x80;
- int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
- : ZeroMask;
- int V2Idx = Mask[i / Scale] < Size
- ? ZeroMask
- : (Mask[i / Scale] - Size) * Scale + i % Scale;
- if (Zeroable[i / Scale])
- V1Idx = V2Idx = ZeroMask;
- V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
- V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
- V1InUse |= (ZeroMask != V1Idx);
- V2InUse |= (ZeroMask != V2Idx);
- }
+ for (int i = 0; i < NumBytes; ++i) {
+ int M = Mask[i / Scale];
+ if (M < 0)
+ continue;
+
+ const int ZeroMask = 0x80;
+ int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask;
+ int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale;
+ if (Zeroable[i / Scale])
+ V1Idx = V2Idx = ZeroMask;
+
+ V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
+ V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
+ V1InUse |= (ZeroMask != V1Idx);
+ V2InUse |= (ZeroMask != V2Idx);
}
+ MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes);
if (V1InUse)
- V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getBitcast(MVT::v16i8, V1),
- DAG.getBuildVector(MVT::v16i8, DL, V1Mask));
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1),
+ DAG.getBuildVector(ShufVT, DL, V1Mask));
if (V2InUse)
- V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- DAG.getBitcast(MVT::v16i8, V2),
- DAG.getBuildVector(MVT::v16i8, DL, V2Mask));
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2),
+ DAG.getBuildVector(ShufVT, DL, V2Mask));
// If we need shuffled inputs from both, blend the two.
SDValue V;
if (V1InUse && V2InUse)
- V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2);
else
V = V1InUse ? V1 : V2;
@@ -12484,8 +13250,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return BitBlend;
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
- V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+ DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
@@ -12499,7 +13265,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We can always bit-blend if we have to so the fallback strategy is to
// decompose into single-input permutes and blends.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Check whether a compaction lowering can be done by dropping even
@@ -12632,6 +13398,10 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ return V;
+
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
// Notably, this handles splat and partial-splat shuffles more efficiently.
// However, it only makes sense if the pre-duplication shuffle simplifies
@@ -12769,12 +13539,18 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v16i8, V1, V2, Mask, DAG))
+ DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Unpack;
// If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
if (Subtarget.hasVBMI() && Subtarget.hasVLX())
return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+
+ // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
+ // PALIGNR will be cheaper than the second PSHUFB+OR.
+ if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute(
+ DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ return V;
}
return PSHUFB;
@@ -12830,7 +13606,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Handle multi-input cases by blending single-input shuffles.
if (NumV2Elements > 0)
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// The fallback path for single-input shuffles widens this into two v8i16
// vectors with unpacks, shuffles those, and then pulls them back together
@@ -13043,6 +13819,7 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
"shuffles as it could then recurse on itself.");
@@ -13069,7 +13846,7 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
};
if (DoBothBroadcast())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
- DAG);
+ Subtarget, DAG);
// If the inputs all stem from a single 128-bit lane of each input, then we
// split them rather than blending because the split will decompose to
@@ -13087,7 +13864,62 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
// Otherwise, just fall back to decomposed shuffles and a blend. This requires
// that the decomposed single-input shuffles don't end up here.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+ Subtarget, DAG);
+}
+
+/// Lower a vector shuffle crossing multiple 128-bit lanes as
+/// a lane permutation followed by a per-lane permutation.
+///
+/// This is mainly for cases where we can have non-repeating permutes
+/// in each lane.
+///
+/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
+/// we should investigate merging them.
+static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+
+ SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Ensure that each lane comes from a single source lane.
+ int SrcLane = M / NumEltsPerLane;
+ int DstLane = i / NumEltsPerLane;
+ if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
+ return SDValue();
+ SrcLaneMask[DstLane] = SrcLane;
+
+ LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
+ PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
+ }
+
+ // If we're only shuffling a single lowest lane and the rest are identity
+ // then don't bother.
+ // TODO - isShuffleMaskInputInPlace could be extended to something like this.
+ int NumIdentityLanes = 0;
+ bool OnlyShuffleLowestLane = true;
+ for (int i = 0; i != NumLanes; ++i) {
+ if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
+ i * NumEltsPerLane))
+ NumIdentityLanes++;
+ else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
+ OnlyShuffleLowestLane = false;
+ }
+ if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
+ return SDValue();
+
+ SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
+ return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
}
/// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -13248,79 +14080,174 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
/// Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
-/// This will only succeed when the result of fixing the 128-bit lanes results
-/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
-/// each 128-bit lanes. This handles many cases where we can quickly blend away
-/// the lane crosses early and then use simpler shuffles within each lane.
+/// This attempts to create a repeated lane shuffle where each lane uses one
+/// or two of the lanes of the inputs. The lanes of the input vectors are
+/// shuffled in one or two independent shuffles to get the lanes into the
+/// position needed by the final shuffle.
///
-/// FIXME: It might be worthwhile at some point to support this without
-/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
-/// in x86 only floating point has interesting non-repeating shuffles, and even
-/// those are still *marginally* more expensive.
+/// FIXME: This should be generalized to 512-bit shuffles.
static SDValue lowerVectorShuffleByMerging128BitLanes(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!V2.isUndef() && "This is only useful with multiple inputs.");
+ if (is128BitLaneRepeatedShuffleMask(VT, Mask))
+ return SDValue();
+
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
int NumLanes = Size / LaneSize;
- assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
+ assert(NumLanes == 2 && "Only handles 256-bit shuffles.");
+
+ SmallVector<int, 16> RepeatMask(LaneSize, -1);
+ int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } };
+
+ // First pass will try to fill in the RepeatMask from lanes that need two
+ // sources.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Srcs[2] = { -1, -1 };
+ SmallVector<int, 16> InLaneMask(LaneSize, -1);
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = Mask[(Lane * LaneSize) + i];
+ if (M < 0)
+ continue;
+ // Determine which of the 4 possible input lanes (2 from each source)
+ // this element comes from. Assign that as one of the sources for this
+ // lane. We can assign up to 2 sources for this lane. If we run out
+ // sources we can't do anything.
+ int LaneSrc = M / LaneSize;
+ int Src;
+ if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
+ Src = 0;
+ else if (Srcs[1] < 0 || Srcs[1] == LaneSrc)
+ Src = 1;
+ else
+ return SDValue();
- // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
- // check whether the in-128-bit lane shuffles share a repeating pattern.
- SmallVector<int, 4> Lanes((unsigned)NumLanes, -1);
- SmallVector<int, 4> InLaneMask((unsigned)LaneSize, -1);
- for (int i = 0; i < Size; ++i) {
- if (Mask[i] < 0)
+ Srcs[Src] = LaneSrc;
+ InLaneMask[i] = (M % LaneSize) + Src * Size;
+ }
+
+ // If this lane has two sources, see if it fits with the repeat mask so far.
+ if (Srcs[1] < 0)
continue;
- int j = i / LaneSize;
+ LaneSrcs[Lane][0] = Srcs[0];
+ LaneSrcs[Lane][1] = Srcs[1];
- if (Lanes[j] < 0) {
- // First entry we've seen for this lane.
- Lanes[j] = Mask[i] / LaneSize;
- } else if (Lanes[j] != Mask[i] / LaneSize) {
- // This doesn't match the lane selected previously!
- return SDValue();
+ auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) {
+ assert(M1.size() == M2.size() && "Unexpected mask size");
+ for (int i = 0, e = M1.size(); i != e; ++i)
+ if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i])
+ return false;
+ return true;
+ };
+
+ auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) {
+ assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
+ for (int i = 0, e = MergedMask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ assert((MergedMask[i] < 0 || MergedMask[i] == M) &&
+ "Unexpected mask element");
+ MergedMask[i] = M;
+ }
+ };
+
+ if (MatchMasks(InLaneMask, RepeatMask)) {
+ // Merge this lane mask into the final repeat mask.
+ MergeMasks(InLaneMask, RepeatMask);
+ continue;
}
- // Check that within each lane we have a consistent shuffle mask.
- int k = i % LaneSize;
- if (InLaneMask[k] < 0) {
- InLaneMask[k] = Mask[i] % LaneSize;
- } else if (InLaneMask[k] != Mask[i] % LaneSize) {
- // This doesn't fit a repeating in-lane mask.
- return SDValue();
+ // Didn't find a match. Swap the operands and try again.
+ std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
+ ShuffleVectorSDNode::commuteMask(InLaneMask);
+
+ if (MatchMasks(InLaneMask, RepeatMask)) {
+ // Merge this lane mask into the final repeat mask.
+ MergeMasks(InLaneMask, RepeatMask);
+ continue;
}
+
+ // Couldn't find a match with the operands in either order.
+ return SDValue();
}
- // First shuffle the lanes into place.
- MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
- VT.getSizeInBits() / 64);
- SmallVector<int, 8> LaneMask((unsigned)NumLanes * 2, -1);
- for (int i = 0; i < NumLanes; ++i)
- if (Lanes[i] >= 0) {
- LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
- LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
+ // Now handle any lanes with only one source.
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ // If this lane has already been processed, skip it.
+ if (LaneSrcs[Lane][0] >= 0)
+ continue;
+
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = Mask[(Lane * LaneSize) + i];
+ if (M < 0)
+ continue;
+
+ // If RepeatMask isn't defined yet we can define it ourself.
+ if (RepeatMask[i] < 0)
+ RepeatMask[i] = M % LaneSize;
+
+ if (RepeatMask[i] < Size) {
+ if (RepeatMask[i] != M % LaneSize)
+ return SDValue();
+ LaneSrcs[Lane][0] = M / LaneSize;
+ } else {
+ if (RepeatMask[i] != ((M % LaneSize) + Size))
+ return SDValue();
+ LaneSrcs[Lane][1] = M / LaneSize;
+ }
}
- V1 = DAG.getBitcast(LaneVT, V1);
- V2 = DAG.getBitcast(LaneVT, V2);
- SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
+ if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
+ return SDValue();
+ }
+
+ SmallVector<int, 16> NewMask(Size, -1);
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Src = LaneSrcs[Lane][0];
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = -1;
+ if (Src >= 0)
+ M = Src * LaneSize + i;
+ NewMask[Lane * LaneSize + i] = M;
+ }
+ }
+ SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+ // Ensure we didn't get back the shuffle we started with.
+ // FIXME: This is a hack to make up for some splat handling code in
+ // getVectorShuffle.
+ if (isa<ShuffleVectorSDNode>(NewV1) &&
+ cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
+ return SDValue();
- // Cast it back to the type we actually want.
- LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ int Src = LaneSrcs[Lane][1];
+ for (int i = 0; i != LaneSize; ++i) {
+ int M = -1;
+ if (Src >= 0)
+ M = Src * LaneSize + i;
+ NewMask[Lane * LaneSize + i] = M;
+ }
+ }
+ SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+ // Ensure we didn't get back the shuffle we started with.
+ // FIXME: This is a hack to make up for some splat handling code in
+ // getVectorShuffle.
+ if (isa<ShuffleVectorSDNode>(NewV2) &&
+ cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
+ return SDValue();
- // Now do a simple shuffle that isn't lane crossing.
- SmallVector<int, 8> NewMask((unsigned)Size, -1);
- for (int i = 0; i < Size; ++i)
- if (Mask[i] >= 0)
- NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
- assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
- "Must not introduce lane crosses at this point!");
+ for (int i = 0; i != Size; ++i) {
+ NewMask[i] = RepeatMask[i % LaneSize];
+ if (NewMask[i] < 0)
+ continue;
- return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
+ NewMask[i] += (i / LaneSize) * LaneSize;
+ }
+ return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
}
/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
@@ -13731,6 +14658,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
DAG, Subtarget);
@@ -13765,6 +14697,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;
+
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
@@ -13775,10 +14708,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 4-lane 64-bit integer shuffles.
@@ -13872,7 +14806,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit floating point shuffles.
@@ -13961,17 +14895,18 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpckhwd instrs than vblend.
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
- Mask, DAG))
+ Mask, Subtarget, DAG))
return V;
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit integer shuffles.
@@ -14000,8 +14935,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpcklwd and vpunpckhwd instrs.
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
!Subtarget.hasAVX512())
- if (SDValue V =
- lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
+ if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
+ Mask, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
@@ -14084,7 +15019,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Otherwise fall back on generic blend lowering.
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
- Mask, DAG);
+ Mask, Subtarget, DAG);
}
/// Handle lowering of 16-lane 16-bit integer shuffles.
@@ -14146,9 +15081,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (V2.isUndef()) {
// There are no generalized cross-lane shuffle operations available on i16
// element types.
- if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+ if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG, Subtarget);
+ }
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
@@ -14174,8 +15114,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Result;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 32-lane 8-bit integer shuffles.
@@ -14236,9 +15182,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are no generalized cross-lane shuffle operations available on i8
// element types.
- if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
DAG, Subtarget);
+ }
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
@@ -14254,8 +15205,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Result;
+ // Try to permute the lanes and then use a per-lane permute.
+ if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ return V;
+
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG);
}
/// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -14757,6 +15714,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
return V;
+ // Use dedicated pack instructions for masks that match their pattern.
+ if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+ Subtarget))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -14845,6 +15807,39 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
}
+// Determine if this shuffle can be implemented with a KSHIFT instruction.
+// Returns the shift amount if possible or -1 if not. This is a simplified
+// version of matchVectorShuffleAsShift.
+static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
+ int MaskOffset, const APInt &Zeroable) {
+ int Size = Mask.size();
+
+ auto CheckZeros = [&](int Shift, bool Left) {
+ for (int j = 0; j < Shift; ++j)
+ if (!Zeroable[j + (Left ? 0 : (Size - Shift))])
+ return false;
+
+ return true;
+ };
+
+ auto MatchShift = [&](int Shift, bool Left) {
+ unsigned Pos = Left ? Shift : 0;
+ unsigned Low = Left ? 0 : Shift;
+ unsigned Len = Size - Shift;
+ return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset);
+ };
+
+ for (int Shift = 1; Shift != Size; ++Shift)
+ for (bool Left : {true, false})
+ if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) {
+ Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR;
+ return Shift;
+ }
+
+ return -1;
+}
+
+
// Lower vXi1 vector shuffles.
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
@@ -14854,6 +15849,9 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ assert(Subtarget.hasAVX512() &&
+ "Cannot lower 512-bit vectors w/o basic ISA!");
+
unsigned NumElts = Mask.size();
// Try to recognize shuffles that are just padding a subvector with zeros.
@@ -14880,9 +15878,21 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Extract, DAG.getIntPtrConstant(0, DL));
}
+ // Try to match KSHIFTs.
+ // TODO: Support narrower than legal shifts by widening and extracting.
+ if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
+ unsigned Offset = 0;
+ for (SDValue V : { V1, V2 }) {
+ unsigned Opcode;
+ int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
+ if (ShiftAmt >= 0)
+ return DAG.getNode(Opcode, DL, VT, V,
+ DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ Offset += NumElts; // Increment for next iteration.
+ }
+ }
+
- assert(Subtarget.hasAVX512() &&
- "Cannot lower 512-bit vectors w/o basic ISA!");
MVT ExtVT;
switch (VT.SimpleTy) {
default:
@@ -15069,6 +16079,14 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+ // Shuffle mask widening should not interfere with a broadcast opportunity
+ // by obfuscating the operands with bitcasts.
+ // TODO: Avoid lowering directly from this top-level function: make this
+ // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
+ if (SDValue Broadcast =
+ lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return Broadcast;
+
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
@@ -15135,34 +16153,27 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
- return SDValue();
- auto *CondBV = cast<BuildVectorSDNode>(Cond);
-
// Only non-legal VSELECTs reach this lowering, convert those into generic
// shuffles and re-use the shuffle lowering path for blends.
SmallVector<int, 32> Mask;
- for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
- SDValue CondElt = CondBV->getOperand(i);
- int M = i;
- // We can't map undef to undef here. They have different meanings. Treat
- // as the same as zero.
- if (CondElt.isUndef() || isNullConstant(CondElt))
- M += Size;
- Mask.push_back(M);
- }
- return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
+ if (createShuffleMaskFromVSELECT(Mask, Cond))
+ return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
+
+ return SDValue();
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
// A vselect where all conditions and data are constants can be optimized into
// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+ if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
return SDValue();
// Try to lower this to a blend-style vector shuffle. This can handle all
@@ -15172,7 +16183,9 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// If this VSELECT has a vector if i1 as a mask, it will be directly matched
// with patterns on the mask registers on AVX-512.
- if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
+ MVT CondVT = Cond.getSimpleValueType();
+ unsigned CondEltSize = Cond.getScalarValueSizeInBits();
+ if (CondEltSize == 1)
return Op;
// Variable blends are only legal from SSE4.1 onward.
@@ -15181,24 +16194,32 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
// into an i1 condition so that we can use the mask-based 512-bit blend
// instructions.
if (VT.getSizeInBits() == 512) {
- SDValue Cond = Op.getOperand(0);
- // The vNi1 condition case should be handled above as it can be trivially
- // lowered.
- assert(Cond.getValueType().getScalarSizeInBits() ==
- VT.getScalarSizeInBits() &&
- "Should have a size-matched integer condition!");
// Build a mask by testing the condition against zero.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
- getZeroVector(VT, Subtarget, DAG, dl),
+ DAG.getConstant(0, dl, CondVT),
ISD::SETNE);
// Now return a new VSELECT using the mask.
- return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
+ return DAG.getSelect(dl, VT, Mask, LHS, RHS);
+ }
+
+ // SEXT/TRUNC cases where the mask doesn't match the destination size.
+ if (CondEltSize != EltSize) {
+ // If we don't have a sign splat, rely on the expansion.
+ if (CondEltSize != DAG.ComputeNumSignBits(Cond))
+ return SDValue();
+
+ MVT NewCondSVT = MVT::getIntegerVT(EltSize);
+ MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts);
+ Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT);
+ return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);
}
// Only some types will be legal on some subtargets. If we can emit a legal
@@ -15219,10 +16240,10 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
case MVT::v8i16:
case MVT::v16i16: {
// Bitcast everything to the vXi8 type and use a vXi8 vselect.
- MVT CastVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
- SDValue Cond = DAG.getBitcast(CastVT, Op->getOperand(0));
- SDValue LHS = DAG.getBitcast(CastVT, Op->getOperand(1));
- SDValue RHS = DAG.getBitcast(CastVT, Op->getOperand(2));
+ MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2);
+ Cond = DAG.getBitcast(CastVT, Cond);
+ LHS = DAG.getBitcast(CastVT, LHS);
+ RHS = DAG.getBitcast(CastVT, RHS);
SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS);
return DAG.getBitcast(VT, Select);
}
@@ -15298,34 +16319,25 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0) // the operation is legal
+ return Op;
- // If the kshift instructions of the correct width aren't natively supported
- // then we need to promote the vector to the native size to get the correct
- // zeroing behavior.
- if (VecVT.getVectorNumElements() < 16) {
- VecVT = MVT::v16i1;
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- DAG.getUNDEF(VecVT), Vec,
+ // Extend to natively supported kshift.
+ unsigned NumElems = VecVT.getVectorNumElements();
+ MVT WideVecVT = VecVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+ WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+ DAG.getUNDEF(WideVecVT), Vec,
DAG.getIntPtrConstant(0, dl));
}
- // Extracts from element 0 are always allowed.
- if (IdxVal != 0) {
- // Use kshiftr instruction to move to the lower element.
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- }
-
- // Shrink to v16i1 since that's always legal.
- if (VecVT.getVectorNumElements() > 16) {
- VecVT = MVT::v16i1;
- Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
- DAG.getIntPtrConstant(0, dl));
- }
+ // Use kshiftr instruction to move to the lower element.
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
- // Convert to a bitcast+aext/trunc.
- MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
- return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ DAG.getIntPtrConstant(0, dl));
}
SDValue
@@ -15793,7 +16805,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
- if (isPositionIndependent() && !Subtarget.is64Bit()) {
+ if (OpFlag) {
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
@@ -16173,6 +17185,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
+/// TODO: Can this be moved to general expansion code?
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getSimpleValueType();
@@ -16182,8 +17195,8 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
- // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+ // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away
// during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, dl, MVT::i8));
@@ -16193,10 +17206,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
@@ -16220,6 +17233,56 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({ Lo, Hi }, dl);
}
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+ "Unexpected funnel shift opcode!");
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+
+ bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+
+ if (VT.isVector()) {
+ assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ APInt APIntShiftAmt;
+ if (isConstantSplat(Amt, APIntShiftAmt)) {
+ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
+ return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+ Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ }
+
+ return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ Op0, Op1, Amt);
+ }
+
+ assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
+
+ // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+ bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ if (!OptForSize && Subtarget.isSHLDSlow())
+ return SDValue();
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+ if (VT == MVT::i16)
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+ DAG.getConstant(15, DL, Amt.getValueType()));
+
+ unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
+ return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+}
+
// Try to use a packed vector operation to handle i64 on 32-bit targets when
// AVX512DQ is enabled.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -16271,9 +17334,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
// Legal.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT))
return Op;
- if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) {
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit())
return Op;
- }
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
@@ -16331,7 +17393,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
- // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // FIXME: Currently the FST is glued to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
@@ -16412,13 +17474,11 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
SDValue Result;
if (Subtarget.hasSSE3()) {
- // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+ // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
- SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
- DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
+ SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+ Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -16910,33 +17970,43 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
+ // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+ if (InVT == MVT::v8i8) {
+ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ return SDValue();
+
+ In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+ MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+ // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
+ }
+
if (Subtarget.hasInt256())
- return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
+ return Op;
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
- // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpmovzwd for 4 lower elements v8i16 -> v4i32.
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
// Concat upper and lower parts.
//
// v4i32 -> v4i64
- // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpmovzdq for 4 lower elements v4i32 -> v2i64.
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
- SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+
+ SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+
+ SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
- SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
-
- MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getBitcast(HVT, OpLo);
- OpHi = DAG.getBitcast(HVT, OpHi);
+ OpHi = DAG.getBitcast(HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -16965,7 +18035,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
SDLoc DL(Op);
unsigned NumElts = VT.getVectorNumElements();
- // For all vectors, but vXi8 we can just emit a sign_extend a shift. This
+ // For all vectors, but vXi8 we can just emit a sign_extend and a shift. This
// avoids a constant pool load.
if (VT.getVectorElementType() != MVT::i8) {
SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
@@ -16995,7 +18065,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
}
SDValue One = DAG.getConstant(1, DL, WideVT);
- SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL);
+ SDValue Zero = DAG.getConstant(0, DL, WideVT);
SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
@@ -17035,9 +18105,10 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
const X86Subtarget &Subtarget) {
assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) &&
"Unexpected PACK opcode");
+ assert(DstVT.isVector() && "VT not a vector?");
// Requires SSE2 but AVX512 has fast vector truncate.
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512() || !DstVT.isVector())
+ if (!Subtarget.hasSSE2())
return SDValue();
EVT SrcVT = In.getValueType();
@@ -17203,10 +18274,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
}
// If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
if (Subtarget.hasDQI())
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
- In, ISD::SETGT);
- return DAG.getSetCC(DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL),
- ISD::SETNE);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@@ -17219,20 +18288,22 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
+ // If called by the legalizer just return.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+ return SDValue();
+
if (VT.getVectorElementType() == MVT::i1)
return LowerTruncateVecI1(Op, DAG, Subtarget);
// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (Subtarget.hasAVX512()) {
- // word to byte only under BWI
- if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8
- // Make sure we're allowed to promote 512-bits.
- if (Subtarget.canExtendTo512DQ())
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
- } else {
+ // word to byte only under BWI. Otherwise we have to promoted to v16i32
+ // and then truncate that. But we should only do that if we haven't been
+ // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
+ // handled by isel patterns.
+ if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
+ Subtarget.canExtendTo512DQ())
return Op;
- }
}
unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);
@@ -17241,8 +18312,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Truncate with PACKUS if we are truncating a vector with leading zero bits
// that extend all the way to the packed/truncated value.
// Pre-SSE41 we can only use PACKUSWB.
- KnownBits Known;
- DAG.computeKnownBits(In, Known);
+ KnownBits Known = DAG.computeKnownBits(In);
if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
if (SDValue V =
truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget))
@@ -17320,6 +18390,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBitcast(MVT::v8i16, res);
}
+ if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
+ // Use an AND to zero uppper bits for PACKUS.
+ In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));
+
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+ DAG.getIntPtrConstant(8, DL));
+ return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
+ }
+
// Handle truncation of V256 to V128 using shuffles.
assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
@@ -17405,6 +18486,98 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
+/// Horizontal vector math instructions may be slower than normal math with
+/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
+/// implementation, and likely shuffle complexity of the alternate sequence.
+static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool HasFastHOps = Subtarget.hasFastHorizontalOps();
+ return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // If both operands have other uses, this is probably not profitable.
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (!LHS.hasOneUse() && !RHS.hasOneUse())
+ return Op;
+
+ // FP horizontal add/sub were added with SSE3. Integer with SSSE3.
+ bool IsFP = Op.getSimpleValueType().isFloatingPoint();
+ if (IsFP && !Subtarget.hasSSE3())
+ return Op;
+ if (!IsFP && !Subtarget.hasSSSE3())
+ return Op;
+
+ // Defer forming the minimal horizontal op if the vector source has more than
+ // the 2 extract element uses that we're matching here. In that case, we might
+ // form a horizontal op that includes more than 1 add/sub op.
+ if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ LHS.getOperand(0) != RHS.getOperand(0) ||
+ !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
+ return Op;
+
+ if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
+ !isa<ConstantSDNode>(RHS.getOperand(1)) ||
+ !shouldUseHorizontalOp(true, DAG, Subtarget))
+ return Op;
+
+ // Allow commuted 'hadd' ops.
+ // TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
+ unsigned HOpcode;
+ switch (Op.getOpcode()) {
+ case ISD::ADD: HOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+ default:
+ llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
+ }
+ unsigned LExtIndex = LHS.getConstantOperandVal(1);
+ unsigned RExtIndex = RHS.getConstantOperandVal(1);
+ if (LExtIndex == 1 && RExtIndex == 0 &&
+ (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
+ std::swap(LExtIndex, RExtIndex);
+
+ // TODO: This can be extended to handle other adjacent extract pairs.
+ if (LExtIndex != 0 || RExtIndex != 1)
+ return Op;
+
+ SDValue X = LHS.getOperand(0);
+ EVT VecVT = X.getValueType();
+ unsigned BitWidth = VecVT.getSizeInBits();
+ assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) &&
+ "Not expecting illegal vector widths here");
+
+ // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
+ // equivalent, so extract the 256/512-bit source op to 128-bit.
+ // This is free: ymm/zmm -> xmm.
+ SDLoc DL(Op);
+ if (BitWidth == 256 || BitWidth == 512)
+ X = extract128BitVector(X, 0, DAG, DL);
+
+ // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
+ // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+ // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
+ SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
+ DAG.getIntPtrConstant(0, DL));
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
+ "Only expecting float/double");
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+}
+
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
@@ -17424,43 +18597,36 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);
+ assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
+ VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+ VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ "Unexpected type in LowerFABSorFNEG");
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
- MVT LogicVT;
- MVT EltVT;
-
- if (VT.isVector()) {
- LogicVT = VT;
- EltVT = VT.getVectorElementType();
- } else if (IsF128) {
- // SSE instructions are used for optimized f128 logical operations.
- LogicVT = MVT::f128;
- EltVT = VT;
- } else {
- // There are no scalar bitwise logical SSE/AVX instructions, so we
- // generate a 16-byte vector constant and logic op even for the scalar case.
- // Using a 16-byte mask allows folding the load of the mask with
- // the logic op, so it can save (~4 bytes) on code size.
+ // There are no scalar bitwise logical SSE/AVX instructions, so we
+ // generate a 16-byte vector constant and logic op even for the scalar case.
+ // Using a 16-byte mask allows folding the load of the mask with
+ // the logic op, so it can save (~4 bytes) on code size.
+ bool IsFakeVector = !VT.isVector() && !IsF128;
+ MVT LogicVT = VT;
+ if (IsFakeVector)
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
- EltVT = VT;
- }
- unsigned EltBits = EltVT.getSizeInBits();
+ unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
- APInt MaskElt =
- IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
- const fltSemantics &Sem =
- EltVT == MVT::f64 ? APFloat::IEEEdouble() :
- (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+ APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
+ APInt::getSignMask(EltBits);
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
- unsigned LogicOp =
- IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
+ unsigned LogicOp = IsFABS ? X86ISD::FAND :
+ IsFNABS ? X86ISD::FOR :
+ X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (VT.isVector() || IsF128)
@@ -17496,10 +18662,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
"Unexpected type in LowerFCOPYSIGN");
- MVT EltVT = VT.getScalarType();
- const fltSemantics &Sem =
- EltVT == MVT::f64 ? APFloat::IEEEdouble()
- : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
// Perform all scalar logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE.
@@ -17516,7 +18679,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue SignMask = DAG.getConstantFP(
APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
SDValue MagMask = DAG.getConstantFP(
- APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT);
// First, clear all bits but the sign bit from the second operand (sign).
if (IsFakeVector)
@@ -17527,7 +18690,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// TODO: If we had general constant folding for FP logic ops, this check
// wouldn't be necessary.
SDValue MagBits;
- if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
+ if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) {
APFloat APF = Op0CN->getValueAPF();
APF.clearSign();
MagBits = DAG.getConstantFP(APF, dl, LogicVT);
@@ -17572,7 +18735,8 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
// Check whether an OR'd tree is PTEST-able.
static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ SDValue &X86CC) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget.hasSSE41())
@@ -17658,9 +18822,10 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- SDValue Res = DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
- VecIns.back(), VecIns.back());
- return getSETCC(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, Res, DL, DAG);
+ X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
+ DL, MVT::i8);
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
+ VecIns.back(), VecIns.back());
}
/// return true if \c Op has a use that doesn't just read flags.
@@ -17684,8 +18849,8 @@ static bool hasNonFlagsUse(SDValue Op) {
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG) const {
+static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
@@ -17728,159 +18893,26 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
unsigned Opcode = 0;
unsigned NumOperands = 0;
- // Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic instruction before it. Attempt to truncate the operands
- // of the arithmetic instruction and use a reduced bit-width instruction.
- bool NeedTruncation = false;
SDValue ArithOp = Op;
- if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
- SDValue Arith = Op->getOperand(0);
- // Both the trunc and the arithmetic op need to have one user each.
- if (Arith->hasOneUse())
- switch (Arith.getOpcode()) {
- default: break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR: {
- NeedTruncation = true;
- ArithOp = Arith;
- }
- }
- }
-
- // Sometimes flags can be set either with an AND or with an SRL/SHL
- // instruction. SRL/SHL variant should be preferred for masks longer than this
- // number of bits.
- const int ShiftToAndMaxMaskWidth = 32;
- const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
- case ISD::ADD:
- // We only want to rewrite this as a target-specific node with attached
- // flags if there is a reasonable chance of either using that to do custom
- // instructions selection that can fold some of the memory operands, or if
- // only the flags are used. If there are other uses, leave the node alone
- // and emit a test instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg &&
- UI->getOpcode() != ISD::SETCC &&
- UI->getOpcode() != ISD::STORE)
- goto default_case;
-
- if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->isOne() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
-
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
- }
-
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::SHL:
- case ISD::SRL:
- // If we have a constant logical shift that's only used in a comparison
- // against zero turn it into an equivalent AND. This allows turning it into
- // a TEST instruction later.
- if (ZeroCheck && Op->hasOneUse() &&
- isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
- EVT VT = Op.getValueType();
- unsigned BitWidth = VT.getSizeInBits();
- unsigned ShAmt = Op->getConstantOperandVal(1);
- if (ShAmt >= BitWidth) // Avoid undefined shifts.
- break;
- APInt Mask = ArithOp.getOpcode() == ISD::SRL
- ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
- : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
- if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
- break;
- Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
- DAG.getConstant(Mask, dl, VT));
- }
- break;
-
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better. However, AND should be
- // preferred if the instruction can be combined into ANDN.
- if (!hasNonFlagsUse(Op)) {
- SDValue Op0 = ArithOp->getOperand(0);
- SDValue Op1 = ArithOp->getOperand(1);
- EVT VT = ArithOp.getValueType();
- bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
- bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
- bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
-
- // If we cannot select an ANDN instruction, check if we can replace
- // AND+IMM64 with a shift before giving up. This is possible for masks
- // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
- if (!isProperAndn) {
- if (!ZeroCheck)
- break;
-
- assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
- auto *CN = dyn_cast<ConstantSDNode>(Op1);
- if (!CN)
- break;
-
- const APInt &Mask = CN->getAPIntValue();
- if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
- break; // Prefer TEST instruction.
-
- unsigned BitWidth = Mask.getBitWidth();
- unsigned LeadingOnes = Mask.countLeadingOnes();
- unsigned TrailingZeros = Mask.countTrailingZeros();
-
- if (LeadingOnes + TrailingZeros == BitWidth) {
- assert(TrailingZeros < VT.getSizeInBits() &&
- "Shift amount should be less than the type width");
- MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
- SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
- Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
- break;
- }
-
- unsigned LeadingZeros = Mask.countLeadingZeros();
- unsigned TrailingOnes = Mask.countTrailingOnes();
-
- if (LeadingZeros + TrailingOnes == BitWidth) {
- assert(LeadingZeros < VT.getSizeInBits() &&
- "Shift amount should be less than the type width");
- MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
- SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
- Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
- break;
- }
+ // because a TEST instruction will be better.
+ if (!hasNonFlagsUse(Op))
+ break;
- break;
- }
- }
LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Similar to ISD::ADD above, check if the uses will preclude useful
- // lowering of the target-specific node.
+ // Transform to an x86-specific ALU node with flags if there is a chance of
+ // using an RMW op or only the flags are used. Otherwise, leave
+ // the node alone and emit a 'test' instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
@@ -17891,6 +18923,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
+ case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -17901,8 +18934,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
case X86ISD::ADD:
case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
@@ -17912,36 +18943,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
}
- // If we found that truncation is beneficial, perform the truncation and
- // update 'Op'.
- if (NeedTruncation) {
- EVT VT = Op.getValueType();
- SDValue WideVal = Op->getOperand(0);
- EVT WideVT = WideVal.getValueType();
- unsigned ConvertedOp = 0;
- // Use a target machine opcode to prevent further DAGCombine
- // optimizations that may separate the arithmetic operations
- // from the setcc node.
- switch (WideVal.getOpcode()) {
- default: break;
- case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
- case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
- case ISD::AND: ConvertedOp = X86ISD::AND; break;
- case ISD::OR: ConvertedOp = X86ISD::OR; break;
- case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
- }
-
- if (ConvertedOp) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
- SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
- SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- Op = DAG.getNode(ConvertedOp, dl, VTs, V0, V1);
- }
- }
- }
-
if (Opcode == 0) {
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
@@ -17960,17 +18961,17 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
const SDLoc &dl, SelectionDAG &DAG) const {
if (isNullConstant(Op1))
- return EmitTest(Op0, X86CC, dl, DAG);
-
- assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
- "Unexpected comparison operation for MVT::i1 operands");
+ return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided.
- if ((Op0.getValueType() == MVT::i16 &&
- (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
+ if (Op0.getValueType() == MVT::i16 &&
+ ((isa<ConstantSDNode>(Op0) &&
+ !cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
+ (isa<ConstantSDNode>(Op1) &&
+ !cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
!DAG.getMachineFunction().getFunction().optForMinSize() &&
!Subtarget.isAtom()) {
unsigned ExtendOp =
@@ -17983,6 +18984,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
return SDValue(Sub.getNode(), 1);
}
+ assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!");
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@@ -18103,39 +19105,11 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
-/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
-/// according to equal/not-equal condition code \p CC.
-static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG) {
- // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i32 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- // Also promote i16 to i32 for performance / code size reason.
- if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
- Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
-
- // See if we can use the 32-bit instruction instead of the 64-bit one for a
- // shorter encoding. Since the former takes the modulo 32 of BitNo and the
- // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
- // known to be zero.
- if (Src.getValueType() == MVT::i64 &&
- DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
- Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
-
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (Src.getValueType() != BitNo.getValueType())
- BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
-
- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
- X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- return getSETCC(Cond, BT, dl , DAG);
-}
-
/// Result of 'and' is compared against zero. Change to a BT node if possible.
+/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG) {
+ const SDLoc &dl, SelectionDAG &DAG,
+ SDValue &X86CC) {
assert(And.getOpcode() == ISD::AND && "Expected AND node!");
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
@@ -18144,7 +19118,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (Op1.getOpcode() == ISD::TRUNCATE)
Op1 = Op1.getOperand(0);
- SDValue LHS, RHS;
+ SDValue Src, BitNo;
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
@@ -18154,13 +19128,12 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
- KnownBits Known;
- DAG.computeKnownBits(Op0, Known);
+ KnownBits Known = DAG.computeKnownBits(Op0);
if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
return SDValue();
}
- LHS = Op1;
- RHS = Op0.getOperand(1);
+ Src = Op1;
+ BitNo = Op0.getOperand(1);
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
@@ -18168,24 +19141,49 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
SDValue AndLHS = Op0;
if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
- LHS = AndLHS.getOperand(0);
- RHS = AndLHS.getOperand(1);
+ Src = AndLHS.getOperand(0);
+ BitNo = AndLHS.getOperand(1);
} else {
// Use BT if the immediate can't be encoded in a TEST instruction or we
// are optimizing for size and the immedaite won't fit in a byte.
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
- LHS = AndLHS;
- RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
+ Src = AndLHS;
+ BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
+ Src.getValueType());
}
}
}
- if (LHS.getNode())
- return getBitTestCondition(LHS, RHS, CC, dl, DAG);
+ // No patterns found, give up.
+ if (!Src.getNode())
+ return SDValue();
- return SDValue();
+ // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+ Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+
+ // See if we can use the 32-bit instruction instead of the 64-bit one for a
+ // shorter encoding. Since the former takes the modulo 32 of BitNo and the
+ // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
+ // known to be zero.
+ if (Src.getValueType() == MVT::i64 &&
+ DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (Src.getValueType() != BitNo.getValueType())
+ BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+
+ X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
+ dl, MVT::i8);
+ return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
}
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
@@ -18292,34 +19290,32 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
}
-/// Try to turn a VSETULT into a VSETULE by modifying its second
-/// operand \p Op1. If non-trivial (for example because it's not constant)
-/// return an empty value.
-static SDValue ChangeVSETULTtoVSETULE(const SDLoc &dl, SDValue Op1,
- SelectionDAG &DAG) {
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
+/// Given a simple buildvector constant, return a new vector constant with each
+/// element decremented. If decrementing would result in underflow or this
+/// is not a simple vector constant, return an empty value.
+static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
if (!BV)
return SDValue();
- MVT VT = Op1.getSimpleValueType();
- MVT EVT = VT.getVectorElementType();
- unsigned n = VT.getVectorNumElements();
- SmallVector<SDValue, 8> ULTOp1;
-
- for (unsigned i = 0; i < n; ++i) {
- ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
+ MVT VT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> NewVecC;
+ SDLoc DL(V);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
return SDValue();
// Avoid underflow.
- APInt Val = Elt->getAPIntValue();
- if (Val == 0)
+ if (Elt->getAPIntValue().isNullValue())
return SDValue();
- ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
+ NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
}
- return DAG.getBuildVector(VT, dl, ULTOp1);
+ return DAG.getBuildVector(VT, DL, NewVecC);
}
/// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
@@ -18348,7 +19344,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
+ SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
@@ -18362,9 +19358,9 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
break;
}
- SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
+ SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
- getZeroVector(VT, Subtarget, DAG, dl));
+ DAG.getConstant(0, dl, VT));
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
@@ -18527,13 +19523,26 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
- // Special case: Use min/max operations for unsigned compares. We only want
- // to do this for unsigned compares if we need to flip signs or if it allows
- // use to avoid an invert.
+ // Special case: Use min/max operations for unsigned compares.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (ISD::isUnsignedIntSetCC(Cond) &&
(FlipSigns || ISD::isTrueWhenEqual(Cond)) &&
TLI.isOperationLegal(ISD::UMIN, VT)) {
+ // If we have a constant operand, increment/decrement it and change the
+ // condition to avoid an invert.
+ // TODO: This could be extended to handle a non-splat constant by checking
+ // that each element of the constant is not the max/null value.
+ APInt C;
+ if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) {
+ // X > C --> X >= (C+1) --> X == umax(X, C+1)
+ Op1 = DAG.getConstant(C + 1, dl, VT);
+ Cond = ISD::SETUGE;
+ }
+ if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) {
+ // X < C --> X <= (C-1) --> X == umin(X, C-1)
+ Op1 = DAG.getConstant(C - 1, dl, VT);
+ Cond = ISD::SETULE;
+ }
bool Invert = false;
unsigned Opc;
switch (Cond) {
@@ -18577,23 +19586,21 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
assert(Subtarget.hasSSE2() && "Don't know how to lower!");
- // First cast everything to the right type.
- Op0 = DAG.getBitcast(MVT::v4i32, Op0);
- Op1 = DAG.getBitcast(MVT::v4i32, Op1);
-
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
SDValue SB;
if (FlipSigns) {
- SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
+ SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64);
} else {
- SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
- SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
- SB = DAG.getBuildVector(MVT::v4i32, dl, {Sign, Zero, Sign, Zero});
+ SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64);
}
- Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
- Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+ Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB);
+ Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB);
+
+ // Cast everything to the right type.
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
// Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
@@ -18658,10 +19665,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return Result;
}
-// Try to select this as a KTEST+SETCC if possible.
-static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+// Try to select this as a KORTEST+SETCC if possible.
+static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDValue &X86CC) {
// Only support equality comparisons.
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return SDValue();
@@ -18677,12 +19685,12 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
return SDValue();
- X86::CondCode X86CC;
+ X86::CondCode X86Cond;
if (isNullConstant(Op1)) {
- X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
} else if (isAllOnesConstant(Op1)) {
// C flag is set for all ones.
- X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
+ X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
} else
return SDValue();
@@ -18694,70 +19702,87 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
RHS = Op0.getOperand(1);
}
- SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
- return getSETCC(X86CC, KORTEST, dl, DAG);
+ X86CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
}
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- MVT VT = Op.getSimpleValueType();
-
- if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
-
- assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDLoc dl(Op);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
+/// Emit flags for the given setcc condition and operands. Also returns the
+/// corresponding X86 condition code constant in X86CC.
+SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+ ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG,
+ SDValue &X86CC) const {
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue NewSetCC = LowerAndToBT(Op0, CC, dl, DAG))
- return NewSetCC;
+ if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+ return BT;
}
// Try to use PTEST for a tree ORs equality compared with 0.
// TODO: We could do AND tree with all 1s as well by using the C flag.
if (Op0.getOpcode() == ISD::OR && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue NewSetCC = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG))
- return NewSetCC;
+ if (SDValue PTEST = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG, X86CC))
+ return PTEST;
}
- // Try to lower using KTEST.
- if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
- return NewSetCC;
+ // Try to lower using KORTEST.
+ if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
+ return KORTEST;
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
- if (!Invert)
- return Op0;
- CCode = X86::GetOppositeBranchCondition(CCode);
- return getSETCC(CCode, Op0.getOperand(1), dl, DAG);
+ X86CC = Op0.getOperand(0);
+ if (Invert) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ X86CC = DAG.getConstant(CCode, dl, MVT::i8);
+ }
+
+ return Op0.getOperand(1);
}
}
bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
- X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
- if (X86CC == X86::COND_INVALID)
+ X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
+ if (CondCode == X86::COND_INVALID)
return SDValue();
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
+ SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return getSETCC(X86CC, EFLAGS, dl, DAG);
+ X86CC = DAG.getConstant(CondCode, dl, MVT::i8);
+ return EFLAGS;
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDLoc dl(Op);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ SDValue X86CC;
+ SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+ if (!EFLAGS)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
}
SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -18781,6 +19806,70 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
return getSETCC(CC, Cmp.getValue(1), DL, DAG);
}
+// This function returns three things: the arithmetic computation itself
+// (Value), an EFLAGS result (Overflow), and a condition code (Cond). The
+// flag and the condition code define the case in which the arithmetic
+// computation overflows.
+static std::pair<SDValue, SDValue>
+getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getResNo() == 0 && "Unexpected result number!");
+ SDValue Value, Overflow;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ unsigned BaseOp = 0;
+ SDLoc DL(Op);
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown ovf instruction!");
+ case ISD::SADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO:
+ BaseOp = X86ISD::UMUL;
+ Cond = X86::COND_O;
+ break;
+ }
+
+ if (BaseOp) {
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+ Overflow = Value.getValue(1);
+ }
+
+ return std::make_pair(Value, Overflow);
+}
+
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDLoc DL(Op);
+ X86::CondCode Cond;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);
+
+ SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
+}
+
/// Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
@@ -18789,12 +19878,8 @@ static bool isX86LogicalCmp(SDValue Op) {
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
- Opc == X86ISD::SBB || Opc == X86ISD::SMUL ||
- Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
- Opc == X86ISD::XOR || Opc == X86ISD::AND))
- return true;
-
- if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
+ Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;
return false;
@@ -18845,7 +19930,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// of 3 logic instructions for size savings and potentially speed.
// Unfortunately, there is no scalar form of VBLENDV.
- // If either operand is a constant, don't try this. We can expect to
+ // If either operand is a +0.0 constant, don't try this. We can expect to
// optimize away at least one of the logic instructions later in that
// case, so that sequence would be faster than a variable blend.
@@ -18853,13 +19938,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// uses XMM0 as the selection register. That may need just as many
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
-
- if (Subtarget.hasAVX() &&
- !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
-
+ if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
+ !isNullFPConstant(Op2)) {
// Convert to vectors, do a VSELECT, and convert back to scalar.
// All of the conversions should be optimized away.
-
MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
@@ -18919,16 +20001,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (VT == MVT::v4i1 || VT == MVT::v2i1) {
- SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
- Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
- Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
- SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
- }
-
if (Cond.getOpcode() == ISD::SETCC) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
@@ -18963,22 +20035,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
- SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- return Res;
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ Zero = DAG.getConstant(0, DL, Op.getValueType());
+ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
SDValue Res = // Res = 0 or -1.
- DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
+ DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
@@ -19055,34 +20126,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
- ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
- Cond.getOperand(0).getValueType() != MVT::i8)) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned X86Opcode;
- unsigned X86Cond;
- SDVTList VTs;
- switch (CondOpcode) {
- case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
- case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
- case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
- case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
- case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
- case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
- default: llvm_unreachable("unexpected overflowing operator");
- }
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
- MVT::i32);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
- SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
-
- if (CondOpcode == ISD::UMULO)
- Cond = X86Op.getValue(2);
- else
- Cond = X86Op.getValue(1);
+ CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+ SDValue Value;
+ X86::CondCode X86Cond;
+ std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
CC = DAG.getConstant(X86Cond, DL, MVT::i8);
AddTest = false;
@@ -19096,9 +20143,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, DL, DAG)) {
- CC = NewSetCC.getOperand(0);
- Cond = NewSetCC.getOperand(1);
+ SDValue BTCC;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
+ CC = BTCC;
+ Cond = BT;
AddTest = false;
}
}
@@ -19106,7 +20154,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (AddTest) {
CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
+ Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()),
+ X86::COND_NE, DL, DAG);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
@@ -19171,12 +20220,12 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
- // Extend VT if the scalar type is v8/v16 and BWI is not supported.
+ // Extend VT if the scalar type is i8/i16 and BWI is not supported.
MVT ExtVT = VT;
if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) {
// If v16i32 is to be avoided, we'll need to split and concatenate.
if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
- return SplitAndExtendv16i1(ISD::SIGN_EXTEND, VT, In, dl, DAG);
+ return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG);
ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
}
@@ -19195,10 +20244,10 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
MVT WideEltVT = WideVT.getVectorElementType();
if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) ||
(Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) {
- V = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, In);
+ V = DAG.getNode(Op.getOpcode(), dl, WideVT, In);
} else {
- SDValue NegOne = getOnesVector(WideVT, DAG, dl);
- SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, dl);
+ SDValue NegOne = DAG.getConstant(-1, dl, WideVT);
+ SDValue Zero = DAG.getConstant(0, dl, WideVT);
V = DAG.getSelect(dl, WideVT, In, NegOne, Zero);
}
@@ -19238,7 +20287,6 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
SDValue In = Op->getOperand(0);
MVT VT = Op->getSimpleValueType(0);
MVT InVT = In.getSimpleValueType();
- assert(VT.getSizeInBits() == InVT.getSizeInBits());
MVT SVT = VT.getVectorElementType();
MVT InSVT = InVT.getVectorElementType();
@@ -19249,70 +20297,100 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
- !(VT.is256BitVector() && Subtarget.hasInt256()) &&
+ !(VT.is256BitVector() && Subtarget.hasAVX()) &&
!(VT.is512BitVector() && Subtarget.hasAVX512()))
return SDValue();
SDLoc dl(Op);
+ unsigned Opc = Op.getOpcode();
+ unsigned NumElts = VT.getVectorNumElements();
// For 256-bit vectors, we only need the lower (128-bit) half of the input.
// For 512-bit vectors, we need 128-bits or 256-bits.
- if (VT.getSizeInBits() > 128) {
+ if (InVT.getSizeInBits() > 128) {
// Input needs to be at least the same number of elements as output, and
// at least 128-bits.
- int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
+ int InSize = InSVT.getSizeInBits() * NumElts;
In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
+ InVT = In.getSimpleValueType();
}
- assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
- InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
-
- // SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
+ // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
// need to be handled here for 256/512-bit results.
if (Subtarget.hasInt256()) {
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
- unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
- X86ISD::VSEXT : X86ISD::VZEXT;
+
+ if (InVT.getVectorNumElements() != NumElts)
+ return DAG.getNode(Op.getOpcode(), dl, VT, In);
+
+ // FIXME: Apparently we create inreg operations that could be regular
+ // extends.
+ unsigned ExtOpc =
+ Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, VT, In);
}
+ // pre-AVX2 256-bit extensions need to be split into 128-bit instructions.
+ if (Subtarget.hasAVX()) {
+ assert(VT.is256BitVector() && "256-bit vector expected");
+ int HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts);
+
+ unsigned NumSrcElts = InVT.getVectorNumElements();
+ SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef);
+ for (int i = 0; i != HalfNumElts; ++i)
+ HiMask[i] = HalfNumElts + i;
+
+ SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In);
+ SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask);
+ Hi = DAG.getNode(Opc, dl, HalfVT, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ }
+
// We should only get here for sign extend.
- assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
- "Unexpected opcode!");
+ assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!");
+ assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
SDValue Curr = In;
- MVT CurrVT = InVT;
+ SDValue SignExt = Curr;
// As SRAI is only available on i16/i32 types, we expand only up to i32
// and handle i64 separately.
- while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
- Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
- MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
- CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
- Curr = DAG.getBitcast(CurrVT, Curr);
- }
+ if (InVT != MVT::v4i32) {
+ MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
- SDValue SignExt = Curr;
- if (CurrVT != InVT) {
- unsigned SignExtShift =
- CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
- SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+ unsigned DestWidth = DestVT.getScalarSizeInBits();
+ unsigned Scale = DestWidth / InSVT.getSizeInBits();
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned DestElts = DestVT.getVectorNumElements();
+
+ // Build a shuffle mask that takes each input element and places it in the
+ // MSBs of the new element size.
+ SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef);
+ for (unsigned i = 0; i != DestElts; ++i)
+ Mask[i * Scale + (Scale - 1)] = i;
+
+ Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask);
+ Curr = DAG.getBitcast(DestVT, Curr);
+
+ unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
+ SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
}
- if (CurrVT == VT)
- return SignExt;
-
- if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
- SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
- DAG.getConstant(31, dl, MVT::i8));
- SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
- return DAG.getBitcast(VT, Ext);
+ if (VT == MVT::v2i64) {
+ assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT");
+ SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+ SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT);
+ SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
+ SignExt = DAG.getBitcast(VT, SignExt);
}
- return SDValue();
+ return SignExt;
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
@@ -19337,38 +20415,40 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
+ // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+ if (InVT == MVT::v8i8) {
+ if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+ return SDValue();
+
+ In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+ MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In);
+ }
+
if (Subtarget.hasInt256())
- return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ return Op;
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // for v4i32 the high shuffle mask will be {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
- unsigned NumElems = InVT.getVectorNumElements();
- SDValue Undef = DAG.getUNDEF(InVT);
-
- SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask1[i] = i;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
- SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
+ SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
- SmallVector<int,8> ShufMask2(NumElems, -1);
+ unsigned NumElems = InVT.getVectorNumElements();
+ SmallVector<int,8> ShufMask(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask2[i] = i + NumElems/2;
+ ShufMask[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
-
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
-
- OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
- OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+ OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -19379,19 +20459,47 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(St);
SDValue StoredVal = St->getValue();
- // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
- assert(StoredVal.getValueType().isVector() &&
- StoredVal.getValueType().getVectorElementType() == MVT::i1 &&
- StoredVal.getValueType().getVectorNumElements() <= 8 &&
- "Unexpected VT");
- assert(!St->isTruncatingStore() && "Expected non-truncating store");
- assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
- "Expected AVX512F without AVX512DQI");
+ // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+ if (StoredVal.getValueType().isVector() &&
+ StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+ assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+ "Unexpected VT");
+ assert(!St->isTruncatingStore() && "Expected non-truncating store");
+ assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+ "Expected AVX512F without AVX512DQI");
+
+ StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(MVT::v16i1), StoredVal,
+ DAG.getIntPtrConstant(0, dl));
+ StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+ StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
+ if (St->isTruncatingStore())
+ return SDValue();
- StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
- DAG.getUNDEF(MVT::v8i1), StoredVal,
+ MVT StoreVT = StoredVal.getSimpleValueType();
+ assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
+ "Unexpected VT");
+ if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
+ TargetLowering::TypeWidenVector)
+ return SDValue();
+
+ // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+ // and store it.
+ MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
+ StoreVT.getVectorNumElements() * 2);
+ StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
+ DAG.getUNDEF(StoreVT));
+ MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+ MVT CastVT = MVT::getVectorVT(StVT, 2);
+ StoredVal = DAG.getBitcast(CastVT, StoredVal);
+ StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
DAG.getIntPtrConstant(0, dl));
- StoredVal = DAG.getBitcast(MVT::i8, StoredVal);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
@@ -19400,7 +20508,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
// may emit an illegal shuffle but the expansion is still better than scalar
-// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
+// code. We generate sext/sext_invec for SEXTLOADs if it's available, otherwise
// we'll emit a shuffle and a arithmetic shift.
// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
// TODO: It is possible to support ZExt by zeroing the undef values during
@@ -19408,16 +20516,16 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
- assert(RegVT.isVector() && "We only custom lower vector sext loads.");
+ assert(RegVT.isVector() && "We only custom lower vector loads.");
assert(RegVT.isInteger() &&
- "We only custom lower integer vector sext loads.");
+ "We only custom lower integer vector loads.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
- if (RegVT.isVector() && RegVT.getVectorElementType() == MVT::i1) {
+ if (RegVT.getVectorElementType() == MVT::i1) {
assert(EVT(RegVT) == MemVT && "Expected non-extending load");
assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
@@ -19429,12 +20537,12 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Replace chain users with the new chain.
assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1));
- SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
- DAG.getBitcast(MVT::v8i1, NewLd),
- DAG.getIntPtrConstant(0, dl));
- return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl);
+ SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
+ DAG.getBitcast(MVT::v16i1, Val),
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
}
// Nothing useful we can do without SSE2 shuffles.
@@ -19490,10 +20598,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
// Finally, do a normal sign-extend to the desired register.
- return DAG.getSExtOrTrunc(Load, dl, RegVT);
+ SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT);
+ return DAG.getMergeValues({SExt, Load.getValue(1)}, dl);
}
// All sizes must be a power of two.
@@ -19521,26 +20629,26 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
"Can only lower sext loads with a single scalar load!");
- unsigned loadRegZize = RegSz;
+ unsigned loadRegSize = RegSz;
if (Ext == ISD::SEXTLOAD && RegSz >= 256)
- loadRegZize = 128;
+ loadRegSize = 128;
// If we don't have BWI we won't be able to create the shuffle needed for
// v8i8->v8i64.
if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
MemVT == MVT::v8i8)
- loadRegZize = 128;
+ loadRegSize = 128;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(
- *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
+ *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize / MemVT.getScalarSizeInBits());
+ loadRegSize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -19551,15 +20659,20 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+ unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
+ SDValue Increment = DAG.getConstant(OffsetInc, dl,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+ unsigned Offset = 0;
for (unsigned i = 0; i < NumLoads; ++i) {
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
+
// Perform a single load.
SDValue ScalarLoad =
- DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- Ld->getAlignment(), Ld->getMemOperand()->getFlags());
+ DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo().getWithOffset(Offset),
+ NewAlign, Ld->getMemOperand()->getFlags());
Chains.push_back(ScalarLoad.getValue(1));
// Create the first element type using SCALAR_TO_VECTOR in order to avoid
// another round of DAGCombining.
@@ -19570,6 +20683,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
ScalarLoad, DAG.getIntPtrConstant(i, dl));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ Offset += OffsetInc;
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
@@ -19580,28 +20694,14 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
- // If we have SSE4.1, we can directly emit a VSEXT node.
- if (Subtarget.hasSSE41()) {
- SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Sext;
- }
-
- // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
- // lanes.
- assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
- "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
-
- SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Shuff;
+ SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+ return DAG.getMergeValues({Sext, TF}, dl);
}
if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
MemVT == MVT::v8i8) {
- SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Sext;
+ SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+ return DAG.getMergeValues({Sext, TF}, dl);
}
// Redistribute the loaded elements into the different locations.
@@ -19614,8 +20714,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
// Bitcast to the requested type.
Shuff = DAG.getBitcast(RegVT, Shuff);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
- return Shuff;
+ return DAG.getMergeValues({Shuff, TF}, dl);
}
/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
@@ -19712,49 +20811,13 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CondOpcode = Cond.getOpcode();
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
- ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
- Cond.getOperand(0).getValueType() != MVT::i8)) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned X86Opcode;
- unsigned X86Cond;
- SDVTList VTs;
- // Keep this in sync with LowerXALUO, otherwise we might create redundant
- // instructions that can't be removed afterwards (i.e. X86ISD::ADD and
- // X86ISD::INC).
- switch (CondOpcode) {
- case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
- case ISD::SADDO:
- if (isOneConstant(RHS)) {
- X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
- break;
- }
- X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
- case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
- case ISD::SSUBO:
- if (isOneConstant(RHS)) {
- X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
- break;
- }
- X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
- case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
- case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
- default: llvm_unreachable("unexpected overflowing operator");
- }
- if (Inverted)
- X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
- MVT::i32);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
- SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+ CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+ SDValue Value;
+ X86::CondCode X86Cond;
+ std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
- if (CondOpcode == ISD::UMULO)
- Cond = X86Op.getValue(2);
- else
- Cond = X86Op.getValue(1);
+ if (Inverted)
+ X86Cond = X86::GetOppositeBranchCondition(X86Cond);
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
addTest = false;
@@ -19855,34 +20918,17 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
// For FCMP_UNE, we can emit
- // two branches instead of an explicit AND instruction with a
- // separate test. However, we only do this if this block doesn't
- // have a fall-through edge, because this requires an explicit
- // jmp when the condition is false.
- if (Op.getNode()->hasOneUse()) {
- SDNode *User = *Op.getNode()->use_begin();
- // Look for an unconditional branch following this conditional branch.
- // We need this because we need to reverse the successors in order
- // to implement FCMP_UNE.
- if (User->getOpcode() == ISD::BR) {
- SDValue FalseBB = User->getOperand(1);
- SDNode *NewBR =
- DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
- assert(NewBR == User);
- (void)NewBR;
-
- SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
- Cond = Cmp;
- addTest = false;
- Dest = FalseBB;
- }
- }
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+ Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
}
}
@@ -19894,9 +20940,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, dl, DAG)) {
- CC = NewSetCC.getOperand(0);
- Cond = NewSetCC.getOperand(1);
+ SDValue BTCC;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, dl, DAG, BTCC)) {
+ CC = BTCC;
+ Cond = BT;
addTest = false;
}
}
@@ -19905,7 +20952,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
CC = DAG.getConstant(X86Cond, dl, MVT::i8);
- Cond = EmitTest(Cond, X86Cond, dl, DAG);
+ Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
+ X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -20141,6 +21189,25 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// Helper to get immediate/variable SSE shift opcode from other shift opcodes.
+static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) {
+ switch (Opc) {
+ case ISD::SHL:
+ case X86ISD::VSHL:
+ case X86ISD::VSHLI:
+ return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI;
+ case ISD::SRL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRLI:
+ return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI;
+ case ISD::SRA:
+ case X86ISD::VSRA:
+ case X86ISD::VSRAI:
+ return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI;
+ }
+ llvm_unreachable("Unknown target vector shift node");
+}
+
/// Handle vector element shifts where the shift amount is a constant.
/// Takes immediate version of shift as input.
static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
@@ -20236,46 +21303,57 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
CShAmt->getZExtValue(), DAG);
- // Change opcode to non-immediate version
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
- case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
- case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
- }
+ // Change opcode to non-immediate version.
+ Opc = getTargetVShiftUniformOpcode(Opc, true);
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- // +=================+============+=======================================+
- // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
- // +=================+============+=======================================+
- // | i64 | Yes, No | Use ShAmt as lowest elt |
- // | i32 | Yes | zero-extend in-reg |
- // | (i32 zext(i16)) | Yes | zero-extend in-reg |
- // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
- // +=================+============+=======================================+
+ // +====================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +====================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +====================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
- else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+ else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
ShAmt = ShAmt.getOperand(0);
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
- ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
+ if (Subtarget.hasSSE41())
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+ MVT::v2i64, ShAmt);
+ else {
+ SDValue ByteShift = DAG.getConstant(
+ (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
+ ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
+ ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+ ByteShift);
+ }
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+ MVT::v2i64, ShAmt);
} else {
- SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT),
- DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
+ DAG.getUNDEF(SVT)};
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
- MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+ MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
@@ -20292,11 +21370,7 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
if (X86::isZeroNode(Mask))
return DAG.getConstant(0, dl, MaskVT);
- if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
- // Mask should be extended
- Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
- MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
- }
+ assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");
if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!");
@@ -20340,24 +21414,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- switch (Op.getOpcode()) {
- default: break;
- case X86ISD::CMPM:
- case X86ISD::CMPM_RND:
- case X86ISD::VPSHUFBITQMB:
- case X86ISD::VFPCLASS:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- case ISD::TRUNCATE:
- case X86ISD::VTRUNC:
- case X86ISD::VTRUNCS:
- case X86ISD::VTRUNCUS:
- case X86ISD::CVTPS2PH:
- // We can't use ISD::VSELECT here because it is not always "Legal"
- // for the destination type. For example vpmovqb require only AVX512
- // and vselect that can operate on byte element type require BWI
- OpcodeSelect = X86ISD::SELECT;
- break;
- }
if (PreservedSrc.isUndef())
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
@@ -20383,7 +21439,9 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDLoc dl(Op);
assert(Mask.getValueType() == MVT::i8 && "Unexpect type");
- SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
+ SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
+ DAG.getBitcast(MVT::v8i1, Mask),
+ DAG.getIntPtrConstant(0, dl));
if (Op.getOpcode() == X86ISD::FSETCCM ||
Op.getOpcode() == X86ISD::FSETCCM_RND ||
Op.getOpcode() == X86ISD::VFPCLASSS)
@@ -20486,13 +21544,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
}
- case INTR_TYPE_2OP:
- case INTR_TYPE_2OP_IMM8: {
+ case INTR_TYPE_2OP: {
SDValue Src2 = Op.getOperand(2);
- if (IntrData->Type == INTR_TYPE_2OP_IMM8)
- Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
-
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -20724,38 +21778,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Swap Src1 and Src2 in the node creation
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
- case FMA_OP_MASKZ:
- case FMA_OP_MASK: {
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
- SDValue PassThru = SDValue();
-
- // set PassThru element
- if (IntrData->Type == FMA_OP_MASKZ)
- PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- else
- PassThru = Src1;
-
- // We specify 2 possible opcodes for intrinsics with rounding modes.
- // First, we check if the intrinsic may have non-default rounding mode,
- // (IntrData->Opc1 != 0), then we check the rounding mode operand.
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src1, Src2, Src3, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
- dl, Op.getValueType(),
- Src1, Src2, Src3),
- Mask, PassThru, Subtarget, DAG);
- }
case IFMA_OP:
// NOTE: We need to swizzle the operands to pass the multiply operands
// first.
@@ -20766,7 +21788,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// does not change the value. Set it to 0 since it can change.
return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
DAG.getIntPtrConstant(0, dl));
- case CVTPD2PS_MASK: {
+ case CVTPD2PS_RND_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
@@ -20790,13 +21812,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getIntPtrConstant(0, dl)),
Mask, PassThru, Subtarget, DAG);
}
- case FPCLASS: {
- // FPclass intrinsics
- SDValue Src1 = Op.getOperand(1);
- MVT MaskVT = Op.getSimpleValueType();
- SDValue Imm = Op.getOperand(2);
- return DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
- }
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
@@ -20811,32 +21826,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
FPclassMask, DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(MVT::i8, Ins);
}
- case CMP_MASK: {
- // Comparison intrinsics with masks.
- // Example of transformation:
- // (i8 (int_x86_avx512_mask_pcmpeq_q_128
- // (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
- // (i8 (bitcast
- // (v8i1 (insert_subvector zero,
- // (v2i1 (and (PCMPEQM %a, %b),
- // (extract_subvector
- // (v8i1 (bitcast %mask)), 0))), 0))))
- MVT VT = Op.getOperand(1).getSimpleValueType();
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
- SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
- Mask.getSimpleValueType().getSizeInBits());
- SDValue Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2));
- SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, SDValue(),
- Subtarget, DAG);
- // Need to fill with zeros to ensure the bitcast will produce zeroes
- // for the upper bits in the v2i1/v4i1 case.
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
- DAG.getConstant(0, dl, BitcastVT),
- CmpMask, DAG.getIntPtrConstant(0, dl));
- return DAG.getBitcast(Op.getValueType(), Res);
- }
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
@@ -21007,6 +21996,59 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), RoundingMode);
}
+ // ADC/ADCX/SBB
+ case ADX: {
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);
+
+ SDValue Res;
+ // If the carry in is zero, then we should just use ADD/SUB instead of
+ // ADC/SBB.
+ if (isNullConstant(Op.getOperand(1))) {
+ Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3));
+ } else {
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
+ DAG.getConstant(-1, dl, MVT::i8));
+ Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3), GenCF.getValue(1));
+ }
+ SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
+ SDValue Results[] = { SetCC, Res };
+ return DAG.getMergeValues(Results, dl);
+ }
+ case CVTPD2PS_MASK:
+ case CVTPD2I_MASK:
+ case TRUNCATE_TO_REG: {
+ SDValue Src = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+
+ if (isAllOnesConstant(Mask))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+ Mask);
+ }
+ case CVTPS2PH_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Rnd = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+
+ if (isAllOnesConstant(Mask))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd);
+
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
+ PassThru, Mask);
+
+ }
default:
break;
}
@@ -21018,6 +22060,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
@@ -21036,15 +22086,30 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
- bool IsTestPacked = false;
+ unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ // CF = 1
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_E;
+ break;
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
@@ -21055,7 +22120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
@@ -21066,7 +22131,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
@@ -21077,7 +22142,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -21196,14 +22260,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
}
- case Intrinsic::x86_seh_recoverfp: {
+ case Intrinsic::eh_recoverfp: {
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
- "llvm.x86.seh.recoverfp must take a function as the first argument");
+ "llvm.eh.recoverfp must take a function as the first argument");
return recoverFramePointer(DAG, Fn, IncomingFPOp);
}
@@ -21251,25 +22315,31 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- MVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
+ unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+ VT.getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+ // We support two versions of the gather intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
- if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
+ SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
@@ -21287,12 +22357,17 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- MVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
+ unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+ Src.getSimpleValueType().getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+ // We support two versions of the scatter intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
@@ -21433,39 +22508,39 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
}
SDValue Chain = HI.getValue(1);
+ SDValue TSC;
+ if (Subtarget.is64Bit()) {
+ // The EDX register is loaded with the high-order 32 bits of the MSR, and
+ // the EAX register is loaded with the low-order 32 bits.
+ TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+ DAG.getConstant(32, DL, MVT::i8));
+ TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
+ } else {
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
+ }
+
if (Opcode == X86ISD::RDTSCP_DAG) {
- assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
// the ECX register. Add 'ecx' explicitly to the chain.
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
HI.getValue(2));
- // Explicitly store the content of ECX at the location passed in input
- // to the 'rdtscp' intrinsic.
- Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
- MachinePointerInfo());
- }
- if (Subtarget.is64Bit()) {
- // The EDX register is loaded with the high-order 32 bits of the MSR, and
- // the EAX register is loaded with the low-order 32 bits.
- SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
- DAG.getConstant(32, DL, MVT::i8));
- Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
- Results.push_back(Chain);
+ Results.push_back(TSC);
+ Results.push_back(ecx);
+ Results.push_back(ecx.getValue(1));
return;
}
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { LO, HI };
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
- Results.push_back(Pair);
+ Results.push_back(TSC);
Results.push_back(Chain);
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SmallVector<SDValue, 2> Results;
+ SmallVector<SDValue, 3> Results;
SDLoc DL(Op);
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
@@ -21529,7 +22604,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ SDValue Ops[] = { Chain, Val, Ptr, Mask };
return SignedSat ?
DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
@@ -21689,20 +22764,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
- // ADC/ADCX/SBB
- case ADX: {
- SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
- SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32);
- SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
- DAG.getConstant(-1, dl, MVT::i8));
- SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
- Op.getOperand(4), GenCF.getValue(1));
- SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
- Op.getOperand(5), MachinePointerInfo());
- SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
- SDValue Results[] = { SetCC, Store };
- return DAG.getMergeValues(Results, dl);
- }
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
@@ -22255,11 +23316,10 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
// we just take the hi result (by masking the lo result to zero before the
// add).
SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
- SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);
+ SDValue Zero = DAG.getConstant(0, DL, CurrVT);
- SDValue NibbleMask = DAG.getConstant(0xF, DL, CurrVT);
SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
- SDValue Lo = DAG.getNode(ISD::AND, DL, CurrVT, Op0, NibbleMask);
+ SDValue Lo = Op0;
SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
SDValue HiZ;
if (CurrVT.is512BitVector()) {
@@ -22377,38 +23437,23 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumBits = VT.getScalarSizeInBits();
+ SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
- if (VT.isVector()) {
- SDValue N0 = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, VT);
-
- // lsb(x) = (x & -x)
- SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
- DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
-
- // cttz_undef(x) = (width - 1) - ctlz(lsb)
- if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
- SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
- return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
- DAG.getNode(ISD::CTLZ, dl, VT, LSB));
- }
-
- // cttz(x) = ctpop(lsb - 1)
- SDValue One = DAG.getConstant(1, dl, VT);
- return DAG.getNode(ISD::CTPOP, dl, VT,
- DAG.getNode(ISD::SUB, dl, VT, LSB, One));
- }
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntUnary(Op, DAG);
- assert(Op.getOpcode() == ISD::CTTZ &&
+ assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
"Only scalar CTTZ requires custom lowering");
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
@@ -22422,7 +23467,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
/// Break a 256-bit integer operation into two new 128-bit ones and then
/// concatenate the result back.
-static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
@@ -22451,7 +23496,7 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
/// Break a 512-bit integer operation into two new 256-bit ones and then
/// concatenate the result back.
-static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is512BitVector() && VT.isInteger() &&
@@ -22478,18 +23523,46 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
+ if (VT == MVT::i16 || VT == MVT::i32)
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
}
-static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1) {
+ SDLoc dl(Op);
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Expected saturated arithmetic opcode");
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+ DAG.getNOT(dl, Op.getOperand(1), VT));
+ }
+ }
+
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return split256IntArith(Op, DAG);
+}
+
+static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
// Since X86 does not have CMOV for 8-bit integer, we don't convert
@@ -22503,10 +23576,23 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
}
- assert(Op.getSimpleValueType().is256BitVector() &&
- Op.getSimpleValueType().isInteger() &&
- "Only handle AVX 256-bit vector integer operation");
- return Lower256IntUnary(Op, DAG);
+ // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).
+ if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+ return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src);
+ }
+
+ if (VT.is256BitVector() && !Subtarget.hasInt256()) {
+ assert(VT.isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntUnary(Op, DAG);
+ }
+
+ // Default to expand.
+ return SDValue();
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -22514,7 +23600,7 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
// For AVX1 cases, split to use legal ops (everything but v4i64).
if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
@@ -22556,9 +23642,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
- // Decompose 256-bit ops into smaller 128-bit ops.
+ // Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
@@ -22566,53 +23652,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
// vector pairs, multiply and truncate.
if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
- if (Subtarget.hasInt256()) {
- // For 512-bit vectors, split into 256-bit vectors to allow the
- // sign-extension to occur.
- if (VT == MVT::v64i8)
- return Lower512IntArith(Op, DAG);
-
- // For 256-bit vectors, split into 128-bit vectors to allow the
- // sign-extension to occur. We don't need this on AVX512BW as we can
- // safely sign-extend to v32i16.
- if (VT == MVT::v32i8 && !Subtarget.hasBWI())
- return Lower256IntArith(Op, DAG);
+ unsigned NumElts = VT.getVectorNumElements();
+ if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
return DAG.getNode(
ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::MUL, dl, ExVT,
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
+ DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A),
+ DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B)));
}
- assert(VT == MVT::v16i8 &&
- "Pre-AVX2 support only supports v16i8 multiplication");
- MVT ExVT = MVT::v8i16;
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- // Extract the lo parts and sign extend to i16
+ // Extract the lo/hi parts to any extend to i16.
// We're going to mask off the low byte of each result element of the
// pmullw, so it doesn't matter what's in the high byte of each 16-bit
// element.
- const int LoShufMask[] = {0, -1, 1, -1, 2, -1, 3, -1,
- 4, -1, 5, -1, 6, -1, 7, -1};
- SDValue ALo = DAG.getVectorShuffle(VT, dl, A, A, LoShufMask);
- SDValue BLo = DAG.getVectorShuffle(VT, dl, B, B, LoShufMask);
- ALo = DAG.getBitcast(ExVT, ALo);
- BLo = DAG.getBitcast(ExVT, BLo);
-
- // Extract the hi parts and sign extend to i16
- // We're going to mask off the low byte of each result element of the
- // pmullw, so it doesn't matter what's in the high byte of each 16-bit
- // element.
- const int HiShufMask[] = {8, -1, 9, -1, 10, -1, 11, -1,
- 12, -1, 13, -1, 14, -1, 15, -1};
- SDValue AHi = DAG.getVectorShuffle(VT, dl, A, A, HiShufMask);
- SDValue BHi = DAG.getVectorShuffle(VT, dl, B, B, HiShufMask);
- AHi = DAG.getBitcast(ExVT, AHi);
- BHi = DAG.getBitcast(ExVT, BHi);
-
- // Multiply, mask the lower 8bits of the lo/hi results and pack
+ SDValue Undef = DAG.getUNDEF(VT);
+ SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
+ SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
+
+ SDValue BLo, BHi;
+ if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+ // If the LHS is a constant, manually unpackl/unpackh.
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (unsigned i = 0; i != NumElts; i += 16) {
+ for (unsigned j = 0; j != 8; ++j) {
+ LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
+ MVT::i16));
+ HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
+ MVT::i16));
+ }
+ }
+
+ BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+ BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+ } else {
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
+ }
+
+ // Multiply, mask the lower 8bits of the lo/hi results and pack.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
@@ -22661,9 +23743,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
//
// Hi = psllqi(AloBhi + AhiBlo, 32);
// return AloBlo + Hi;
- KnownBits AKnown, BKnown;
- DAG.computeKnownBits(A, AKnown);
- DAG.computeKnownBits(B, BKnown);
+ KnownBits AKnown = DAG.computeKnownBits(A);
+ KnownBits BKnown = DAG.computeKnownBits(B);
APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero);
@@ -22673,7 +23754,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero);
bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero);
- SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
// Only multiply lo/hi halves that aren't known to be zero.
SDValue AloBlo = Zero;
@@ -22702,10 +23783,79 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ bool IsSigned = Op->getOpcode() == ISD::MULHS;
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
- // Decompose 256-bit ops into smaller 128-bit ops.
+ // Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
+ assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
+ (VT == MVT::v16i32 && Subtarget.hasAVX512()));
+
+ // PMULxD operations multiply each even value (starting at 0) of LHS with
+ // the related value of RHS and produce a widen result.
+ // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ //
+ // In other word, to have all the results, we need to perform two PMULxD:
+ // 1. one with the even values.
+ // 2. one with the odd values.
+ // To achieve #2, with need to place the odd values at an even position.
+ //
+ // Place the odd value at an even position (basically, shift all values 1
+ // step to the left):
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
+ 9, -1, 11, -1, 13, -1, 15, -1};
+ // <a|b|c|d> => <b|undef|d|undef>
+ SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A,
+ makeArrayRef(&Mask[0], NumElts));
+ // <e|f|g|h> => <f|undef|h|undef>
+ SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B,
+ makeArrayRef(&Mask[0], NumElts));
+
+ // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+ // ints.
+ MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
+ unsigned Opcode =
+ (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ;
+ // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, A),
+ DAG.getBitcast(MulVT, B)));
+ // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
+ // => <2 x i64> <bf|dh>
+ SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, Odd0),
+ DAG.getBitcast(MulVT, Odd1)));
+
+ // Shuffle it back into the right order.
+ SmallVector<int, 16> ShufMask(NumElts);
+ for (int i = 0; i != (int)NumElts; ++i)
+ ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
+
+ SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
+
+ // If we have a signed multiply but no PMULDQ fix up the result of an
+ // unsigned multiply.
+ if (IsSigned && !Subtarget.hasSSE41()) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
+ SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);
+
+ SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
+ }
+
+ return Res;
+ }
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
@@ -22714,123 +23864,141 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
// logical shift down the upper half and pack back to i8.
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
// and then ashr/lshr the upper bits down to the lower bits before multiply.
- unsigned Opcode = Op.getOpcode();
- unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
- unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+ SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
+ SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
+ Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ }
- // For 512-bit vectors, split into 256-bit vectors to allow the
+ // For signed 512-bit vectors, split into 256-bit vectors to allow the
// sign-extension to occur.
- if (VT == MVT::v64i8)
- return Lower512IntArith(Op, DAG);
+ if (VT == MVT::v64i8 && IsSigned)
+ return split512IntArith(Op, DAG);
- // AVX2 implementations - extend xmm subvectors to ymm.
- if (Subtarget.hasInt256()) {
- unsigned NumElems = VT.getVectorNumElements();
+ // Signed AVX2 implementation - extend xmm subvectors to ymm.
+ if (VT == MVT::v32i8 && IsSigned) {
SDValue Lo = DAG.getIntPtrConstant(0, dl);
- SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl);
-
- if (VT == MVT::v32i8) {
- if (Subtarget.canExtendTo512BW()) {
- SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v32i16, A);
- SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v32i16, B);
- SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB);
- Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul,
- DAG.getConstant(8, dl, MVT::v32i16));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
- }
- SDValue ALo = extract128BitVector(A, 0, DAG, dl);
- SDValue BLo = extract128BitVector(B, 0, DAG, dl);
- SDValue AHi = extract128BitVector(A, NumElems / 2, DAG, dl);
- SDValue BHi = extract128BitVector(B, NumElems / 2, DAG, dl);
- ALo = DAG.getNode(ExAVX, dl, MVT::v16i16, ALo);
- BLo = DAG.getNode(ExAVX, dl, MVT::v16i16, BLo);
- AHi = DAG.getNode(ExAVX, dl, MVT::v16i16, AHi);
- BHi = DAG.getNode(ExAVX, dl, MVT::v16i16, BHi);
- Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
- DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo),
- DAG.getConstant(8, dl, MVT::v16i16));
- Hi = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
- DAG.getNode(ISD::MUL, dl, MVT::v16i16, AHi, BHi),
- DAG.getConstant(8, dl, MVT::v16i16));
- // The ymm variant of PACKUS treats the 128-bit lanes separately, so before
- // using PACKUS we need to permute the inputs to the correct lo/hi xmm lane.
- const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7,
- 16, 17, 18, 19, 20, 21, 22, 23};
- const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31};
- return DAG.getNode(X86ISD::PACKUS, dl, VT,
- DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, LoMask),
- DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
- }
-
- assert(VT == MVT::v16i8 && "Unexpected VT");
-
- SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A);
- SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B);
- SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
- Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
- DAG.getConstant(8, dl, MVT::v16i16));
- // If we have BWI we can use truncate instruction.
- if (Subtarget.hasBWI())
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo);
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi);
- return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
- }
-
- assert(VT == MVT::v16i8 &&
- "Pre-AVX2 support only supports v16i8 multiplication");
- MVT ExVT = MVT::v8i16;
- unsigned ExSSE41 = ISD::MULHU == Opcode ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG;
+ SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
+
+ MVT ExVT = MVT::v16i16;
+ SDValue ALo = extract128BitVector(A, 0, DAG, dl);
+ SDValue BLo = extract128BitVector(B, 0, DAG, dl);
+ SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl);
+ SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl);
+ ALo = DAG.getNode(ExAVX, dl, ExVT, ALo);
+ BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
+ AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
+ BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
+ Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+ Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+ Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
+ Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
+
+ // Bitcast back to VT and then pack all the even elements from Lo and Hi.
+ // Shuffle lowering should turn this into PACKUS+PERMQ
+ Lo = DAG.getBitcast(VT, Lo);
+ Hi = DAG.getBitcast(VT, Hi);
+ return DAG.getVectorShuffle(VT, dl, Lo, Hi,
+ { 0, 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 26, 28, 30,
+ 32, 34, 36, 38, 40, 42, 44, 46,
+ 48, 50, 52, 54, 56, 58, 60, 62});
+ }
+
+ // For signed v16i8 and all unsigned vXi8 we will unpack the low and high
+ // half of each 128 bit lane to widen to a vXi16 type. Do the multiplies,
+ // shift the results and pack the half lane results back together.
+
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+
+ static const int PSHUFDMask[] = { 8, 9, 10, 11, 12, 13, 14, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1};
// Extract the lo parts and zero/sign extend to i16.
- SDValue ALo, BLo;
- if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
- BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+ // Only use SSE4.1 instructions for signed v16i8 where using unpack requires
+ // shifts to sign extend. Using unpack for unsigned only requires an xor to
+ // create zeros and a copy due to tied registers contraints pre-avx. But using
+ // zero_extend_vector_inreg would require an additional pshufd for the high
+ // part.
+
+ SDValue ALo, AHi;
+ if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+ ALo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, A);
+
+ AHi = DAG.getVectorShuffle(VT, dl, A, A, PSHUFDMask);
+ AHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, AHi);
+ } else if (IsSigned) {
+ ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), A));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), A));
+
+ ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG);
+ AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG);
} else {
- const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
- -1, 4, -1, 5, -1, 6, -1, 7};
- ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- ALo = DAG.getBitcast(ExVT, ALo);
- BLo = DAG.getBitcast(ExVT, BLo);
- ALo = DAG.getNode(ExShift, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
- BLo = DAG.getNode(ExShift, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
- }
-
- // Extract the hi parts and zero/sign extend to i16.
- SDValue AHi, BHi;
- if (Subtarget.hasSSE41()) {
- const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1};
- AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
- BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+ ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A,
+ DAG.getConstant(0, dl, VT)));
+ AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A,
+ DAG.getConstant(0, dl, VT)));
+ }
+
+ SDValue BLo, BHi;
+ if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+ // If the LHS is a constant, manually unpackl/unpackh and extend.
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (unsigned i = 0; i != NumElts; i += 16) {
+ for (unsigned j = 0; j != 8; ++j) {
+ SDValue LoOp = B.getOperand(i + j);
+ SDValue HiOp = B.getOperand(i + j + 8);
+
+ if (IsSigned) {
+ LoOp = DAG.getSExtOrTrunc(LoOp, dl, MVT::i16);
+ HiOp = DAG.getSExtOrTrunc(HiOp, dl, MVT::i16);
+ } else {
+ LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
+ HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
+ }
+
+ LoOps.push_back(LoOp);
+ HiOps.push_back(HiOp);
+ }
+ }
+
+ BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+ BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+ } else if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+ BLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, B);
+
+ BHi = DAG.getVectorShuffle(VT, dl, B, B, PSHUFDMask);
+ BHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, BHi);
+ } else if (IsSigned) {
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), B));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), B));
+
+ BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG);
+ BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG);
} else {
- const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
- -1, 12, -1, 13, -1, 14, -1, 15};
- AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
- BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getBitcast(ExVT, AHi);
- BHi = DAG.getBitcast(ExVT, BHi);
- AHi = DAG.getNode(ExShift, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
- BHi = DAG.getNode(ExShift, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
+ BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B,
+ DAG.getConstant(0, dl, VT)));
+ BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B,
+ DAG.getConstant(0, dl, VT)));
}
// Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
- // pack back to v16i8.
+ // pack back to vXi8.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
- RLo = DAG.getNode(ISD::SRL, dl, ExVT, RLo, DAG.getConstant(8, dl, ExVT));
- RHi = DAG.getNode(ISD::SRL, dl, ExVT, RHi, DAG.getConstant(8, dl, ExVT));
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG);
+ RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
+
+ // Bitcast back to VT and then pack all the even elements from Lo and Hi.
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
@@ -22890,105 +24058,6 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
return DAG.getBitcast(VT, CallInfo.first);
}
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- MVT VT = Op0.getSimpleValueType();
- SDLoc dl(Op);
-
- // Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- unsigned Opcode = Op.getOpcode();
- unsigned NumElems = VT.getVectorNumElements();
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
- SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
- SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
- SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
- SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
- SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
- SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
- SDValue Ops[] = {
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
- };
- return DAG.getMergeValues(Ops, dl);
- }
-
- assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
- (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
- (VT == MVT::v16i32 && Subtarget.hasAVX512()));
-
- int NumElts = VT.getVectorNumElements();
-
- // PMULxD operations multiply each even value (starting at 0) of LHS with
- // the related value of RHS and produce a widen result.
- // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- //
- // In other word, to have all the results, we need to perform two PMULxD:
- // 1. one with the even values.
- // 2. one with the odd values.
- // To achieve #2, with need to place the odd values at an even position.
- //
- // Place the odd value at an even position (basically, shift all values 1
- // step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1};
- // <a|b|c|d> => <b|undef|d|undef>
- SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
- makeArrayRef(&Mask[0], NumElts));
- // <e|f|g|h> => <f|undef|h|undef>
- SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
- makeArrayRef(&Mask[0], NumElts));
-
- // Emit two multiplies, one for the lower 2 ints and one for the higher 2
- // ints.
- MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
- bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
- unsigned Opcode =
- (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
- // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Op0),
- DAG.getBitcast(MulVT, Op1)));
- // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
- // => <2 x i64> <bf|dh>
- SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Odd0),
- DAG.getBitcast(MulVT, Odd1)));
-
- // Shuffle it back into the right order.
- SmallVector<int, 16> HighMask(NumElts);
- SmallVector<int, 16> LowMask(NumElts);
- for (int i = 0; i != NumElts; ++i) {
- HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
- LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts);
- }
-
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
-
- // If we have a signed multiply but no PMULDQ fix up the high parts of a
- // unsigned multiply.
- if (IsSigned && !Subtarget.hasSSE41()) {
- SDValue ShAmt = DAG.getConstant(
- 31, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
- SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
- SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
-
- SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
- Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
- }
-
- // The first result of MUL_LOHI is actually the low value, followed by the
- // high value.
- SDValue Ops[] = {Lows, Highs};
- return DAG.getMergeValues(Ops, dl);
-}
-
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
@@ -23042,9 +24111,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
-
- unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
- (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+ unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
@@ -23055,8 +24122,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&
"Unsupported PCMPGT op");
- return DAG.getNode(X86ISD::PCMPGT, dl, VT,
- getZeroVector(VT, Subtarget, DAG, dl), R);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);
}
if (ShiftAmt >= 32) {
@@ -23071,7 +24137,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
{9, 1, 11, 3, 13, 5, 15, 7});
} else {
- // SRA upper i32, SHL whole i64 and select lower i32.
+ // SRA upper i32, SRL whole i64 and select lower i32.
SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
ShiftAmt, DAG);
SDValue Lower =
@@ -23087,199 +24153,123 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
};
// Optimize shl/srl/sra with constant shift amount.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
- uint64_t ShiftAmt = ShiftConst->getZExtValue();
-
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
- // i64 SRA needs to be performed as partial shifts.
- if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
- (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA)
- return ArithmeticShiftRight64(ShiftAmt);
-
- if (VT == MVT::v16i8 ||
- (Subtarget.hasInt256() && VT == MVT::v32i8) ||
- VT == MVT::v64i8) {
- unsigned NumElts = VT.getVectorNumElements();
- MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
-
- // Simple i8 add case
- if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
- return DAG.getNode(ISD::ADD, dl, VT, R, R);
-
- // ashr(R, 7) === cmp_slt(R, 0)
- if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
- if (VT.is512BitVector()) {
- assert(VT == MVT::v64i8 && "Unexpected element type!");
- SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R,
- ISD::SETGT);
- return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
- }
- return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
- }
+ APInt APIntShiftAmt;
+ if (!isConstantSplat(Amt, APIntShiftAmt))
+ return SDValue();
+ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
- // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
- if (VT == MVT::v16i8 && Subtarget.hasXOP())
- return SDValue();
+ if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
- if (Op.getOpcode() == ISD::SHL) {
- // Make a large shift.
- SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
- R, ShiftAmt, DAG);
- SHL = DAG.getBitcast(VT, SHL);
- // Zero out the rightmost bits.
- return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
- }
- if (Op.getOpcode() == ISD::SRL) {
- // Make a large shift.
- SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
- R, ShiftAmt, DAG);
- SRL = DAG.getBitcast(VT, SRL);
- // Zero out the leftmost bits.
- return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
- }
- if (Op.getOpcode() == ISD::SRA) {
- // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
- SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-
- SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
- Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
- Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
- return Res;
- }
- llvm_unreachable("Unknown shift opcode.");
- }
- }
- }
+ // i64 SRA needs to be performed as partial shifts.
+ if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
+ (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
- // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
- // TODO: Replace constant extraction with getTargetConstantBitsFromNode.
- if (!Subtarget.hasXOP() &&
- (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
- (Subtarget.hasAVX512() && VT == MVT::v8i64))) {
+ if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
+ VT == MVT::v64i8) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
- unsigned SubVectorScale = 1;
- if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- SubVectorScale =
- Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
- Amt = Amt.getOperand(0);
- }
+ // Simple i8 add case
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ return DAG.getNode(ISD::ADD, dl, VT, R, R);
- // Peek through any splat that was introduced for i64 shift vectorization.
- int SplatIndex = -1;
- if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
- if (SVN->isSplat()) {
- SplatIndex = SVN->getSplatIndex();
- Amt = Amt.getOperand(0);
- assert(SplatIndex < (int)VT.getVectorNumElements() &&
- "Splat shuffle referencing second operand");
+ // ashr(R, 7) === cmp_slt(R, 0)
+ if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+ SDValue Zeros = DAG.getConstant(0, dl, VT);
+ if (VT.is512BitVector()) {
+ assert(VT == MVT::v64i8 && "Unexpected element type!");
+ SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
}
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
- if (Amt.getOpcode() != ISD::BITCAST ||
- Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+ // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+ if (VT == MVT::v16i8 && Subtarget.hasXOP())
return SDValue();
- Amt = Amt.getOperand(0);
- unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
- (SubVectorScale * VT.getVectorNumElements());
- unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
- uint64_t ShiftAmt = 0;
- unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
- for (unsigned i = 0; i != Ratio; ++i) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
- if (!C)
- return SDValue();
- // 6 == Log2(64)
- ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
- }
-
- // Check remaining shift amounts (if not a splat).
- if (SplatIndex < 0) {
- for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
- uint64_t ShAmt = 0;
- for (unsigned j = 0; j != Ratio; ++j) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
- if (!C)
- return SDValue();
- // 6 == Log2(64)
- ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
- }
- if (ShAmt != ShiftAmt)
- return SDValue();
- }
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
+ ShiftAmt, DAG);
+ SHL = DAG.getBitcast(VT, SHL);
+ // Zero out the rightmost bits.
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R,
+ ShiftAmt, DAG);
+ SRL = DAG.getBitcast(VT, SRL);
+ // Zero out the leftmost bits.
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+
+ SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
}
-
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
- if (Op.getOpcode() == ISD::SRA)
- return ArithmeticShiftRight64(ShiftAmt);
+ llvm_unreachable("Unknown shift opcode.");
}
return SDValue();
}
-// Determine if V is a splat value, and return the scalar.
-static SDValue IsSplatValue(MVT VT, SDValue V, const SDLoc &dl,
- SelectionDAG &DAG, const X86Subtarget &Subtarget,
- unsigned Opcode) {
- V = peekThroughEXTRACT_SUBVECTORs(V);
-
- // Check if this is a splat build_vector node.
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V)) {
- SDValue SplatAmt = BV->getSplatValue();
- if (SplatAmt && SplatAmt.isUndef())
- return SDValue();
- return SplatAmt;
- }
-
- // Check for SUB(SPLAT_BV, SPLAT) cases from rotate patterns.
- if (V.getOpcode() == ISD::SUB &&
- !SupportedVectorVarShift(VT, Subtarget, Opcode)) {
- SDValue LHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(0));
- SDValue RHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(1));
+// If V is a splat value, return the source vector and splat index;
+static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
+ V = peekThroughEXTRACT_SUBVECTORs(V);
- // Ensure that the corresponding splat BV element is not UNDEF.
- BitVector UndefElts;
- BuildVectorSDNode *BV0 = dyn_cast<BuildVectorSDNode>(LHS);
- ShuffleVectorSDNode *SVN1 = dyn_cast<ShuffleVectorSDNode>(RHS);
- if (BV0 && SVN1 && BV0->getSplatValue(&UndefElts) && SVN1->isSplat()) {
- unsigned SplatIdx = (unsigned)SVN1->getSplatIndex();
- if (!UndefElts[SplatIdx])
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getVectorElementType(), V,
- DAG.getIntPtrConstant(SplatIdx, dl));
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return DAG.getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
}
+ break;
}
-
- // Check if this is a shuffle node doing a splat.
- ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V);
- if (!SVN || !SVN->isSplat())
- return SDValue();
-
- unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
- SDValue InVec = V.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- assert((SplatIdx < VT.getVectorNumElements()) &&
- "Unexpected shuffle index found!");
- return InVec.getOperand(SplatIdx);
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2)))
- if (C->getZExtValue() == SplatIdx)
- return InVec.getOperand(1);
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
}
+ }
+
+ return SDValue();
+}
- // Avoid introducing an extract element from a shuffle.
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- VT.getVectorElementType(), InVec,
- DAG.getIntPtrConstant(SplatIdx, dl));
+static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ int SplatIdx;
+ if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ DAG.getIntPtrConstant(SplatIdx, dl));
+ return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
@@ -23289,17 +24279,11 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
+ unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
+ unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
- unsigned X86OpcI = (Opcode == ISD::SHL) ? X86ISD::VSHLI :
- (Opcode == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
-
- unsigned X86OpcV = (Opcode == ISD::SHL) ? X86ISD::VSHL :
- (Opcode == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;
-
- Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
-
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
- if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
+ if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+ if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -23309,6 +24293,50 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
+
+ // vXi8 shifts - shift as v8i16 + mask result.
+ if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
+ (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
+ VT == MVT::v64i8) &&
+ !Subtarget.hasXOP()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+ unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
+ unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ // Create the mask using vXi16 shifts. For shift-rights we need to move
+ // the upper byte down before splatting the vXi8 mask.
+ SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
+ BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
+ BaseShAmt, Subtarget, DAG);
+ if (Opcode != ISD::SHL)
+ BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
+ 8, DAG);
+ BitMask = DAG.getBitcast(VT, BitMask);
+ BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,
+ SmallVector<int, 64>(NumElts, 0));
+
+ SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
+ DAG.getBitcast(ExtVT, R), BaseShAmt,
+ Subtarget, DAG);
+ Res = DAG.getBitcast(VT, Res);
+ Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
+
+ if (Opcode == ISD::SRA) {
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
+ // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
+ SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
+ SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
+ BaseShAmt, Subtarget, DAG);
+ SignMask = DAG.getBitcast(VT, SignMask);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
+ }
+ return Res;
+ }
+ }
}
// Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
@@ -23379,7 +24407,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
// AVX2 can more effectively perform this as a zext/trunc to/from v8i32.
if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, VT);
SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG);
@@ -23401,8 +24429,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
+ unsigned Opc = Op.getOpcode();
+ unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true);
+ unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false);
+
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
@@ -23412,31 +24445,31 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
- if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
+ if (SupportedVectorVarShift(VT, Subtarget, Opc))
return Op;
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v8i16 || VT == MVT::v16i8)) {
- if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+ if (Opc == ISD::SRL || Opc == ISD::SRA) {
SDValue Zero = DAG.getConstant(0, dl, VT);
Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
}
- if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+ if (Opc == ISD::SHL || Opc == ISD::SRL)
return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
- if (Op.getOpcode() == ISD::SRA)
+ if (Opc == ISD::SRA)
return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
}
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
// shifts per-lane and then shuffle the partial results back together.
- if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+ if (VT == MVT::v2i64 && Opc != ISD::SRA) {
// Splat the shift amounts so the scalar shifts above will catch it.
SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
- SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
- SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
@@ -23444,7 +24477,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// M = lshr(SIGN_MASK, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
- Op.getOpcode() == ISD::SRA) {
+ Opc == ISD::SRA) {
SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
@@ -23489,36 +24522,34 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// Only perform this blend if we can perform it without loading a mask.
if (ShuffleMask.size() == NumElts && Amt1 && Amt2 &&
- isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2) &&
(VT != MVT::v16i16 ||
is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
- (VT == MVT::v4i32 || Subtarget.hasSSE41() ||
- Op.getOpcode() != ISD::SHL || canWidenShuffleElements(ShuffleMask))) {
- SDValue Splat1 =
- DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
- SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
- SDValue Splat2 =
- DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
- SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
- return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+ (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL ||
+ canWidenShuffleElements(ShuffleMask))) {
+ auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1);
+ auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2);
+ if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) &&
+ Cst2->getAPIntValue().ult(EltSizeInBits)) {
+ SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+ Cst1->getZExtValue(), DAG);
+ SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+ Cst2->getZExtValue(), DAG);
+ return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+ }
}
}
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
- if (Op.getOpcode() == ISD::SHL)
+ if (Opc == ISD::SHL)
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
- // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+ // Constant ISD::SRL can be performed efficiently on vXi16 vectors as we
// can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
- // TODO: Improve support for the shift by zero special case.
- if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
- ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
- DAG.isKnownNeverZero(Amt)) &&
- (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
- SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+ if (Opc == ISD::SRL && ConstantAmt &&
+ (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) {
+ SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -23528,13 +24559,36 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // Constant ISD::SRA can be performed efficiently on vXi16 vectors as we
+ // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt).
+ // TODO: Special case handling for shift by 0/1, really we can afford either
+ // of these cases in pre-SSE41/XOP/AVX512 but not both.
+ if (Opc == ISD::SRA && ConstantAmt &&
+ (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) &&
+ ((Subtarget.hasSSE41() && !Subtarget.hasXOP() &&
+ !Subtarget.hasAVX512()) ||
+ DAG.isKnownNeverZero(Amt))) {
+ SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
+ SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+ if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+ SDValue Amt0 =
+ DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ);
+ SDValue Amt1 =
+ DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ);
+ SDValue Sra1 =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG);
+ SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);
+ Res = DAG.getSelect(dl, VT, Amt0, R, Res);
+ return DAG.getSelect(dl, VT, Amt1, Sra1, Res);
+ }
+ }
+
// v4i32 Non Uniform Shifts.
// If the shift amount is constant we can shift each lane using the SSE2
// immediate shifts, else we need to zero-extend each lane to the lower i64
// and shift using the SSE2 variable shifts.
// The separate results can then be blended together.
if (VT == MVT::v4i32) {
- unsigned Opc = Op.getOpcode();
SDValue Amt0, Amt1, Amt2, Amt3;
if (ConstantAmt) {
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
@@ -23542,26 +24596,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
} else {
- // ISD::SHL is handled above but we include it here for completeness.
- switch (Opc) {
- default:
- llvm_unreachable("Unknown target vector shift node");
- case ISD::SHL:
- Opc = X86ISD::VSHL;
- break;
- case ISD::SRL:
- Opc = X86ISD::VSRL;
- break;
- case ISD::SRA:
- Opc = X86ISD::VSRA;
- break;
- }
// The SSE2 shifts use the lower i64 as the same shift amount for
// all lanes and the upper i64 is ignored. On AVX we're better off
// just zero-extending, but for SSE just duplicating the top 16-bits is
// cheaper and has the same effect for out of range values.
if (Subtarget.hasAVX()) {
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, VT);
Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
@@ -23581,10 +24621,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
- SDValue R0 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt0));
- SDValue R1 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt1));
- SDValue R2 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt2));
- SDValue R3 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt3));
+ unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
+ SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
+ SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
+ SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
+ SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));
// Merge the shifted lane results optimally with/without PBLENDW.
// TODO - ideally shuffle combining would handle this.
@@ -23611,19 +24652,66 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
"Unexpected vector type");
MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
- unsigned ExtOpc =
- Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+ DAG.getNode(Opc, dl, ExtVT, R, Amt));
+ }
+
+ // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we
+ // extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI.
+ if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) &&
+ (VT == MVT::v16i8 || VT == MVT::v64i8 ||
+ (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+ !Subtarget.hasXOP()) {
+ int NumElts = VT.getVectorNumElements();
+ SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
+
+ // Extend constant shift amount to vXi16 (it doesn't matter if the type
+ // isn't legal).
+ MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+ Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT);
+ Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt);
+ Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt);
+ assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) &&
+ "Constant build vector expected");
+
+ if (VT == MVT::v16i8 && Subtarget.hasInt256()) {
+ R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT)
+ : DAG.getZExtOrTrunc(R, dl, ExVT);
+ R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt);
+ R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8);
+ return DAG.getZExtOrTrunc(R, dl, VT);
+ }
+
+ SmallVector<SDValue, 16> LoAmt, HiAmt;
+ for (int i = 0; i != NumElts; i += 16) {
+ for (int j = 0; j != 8; ++j) {
+ LoAmt.push_back(Amt.getOperand(i + j));
+ HiAmt.push_back(Amt.getOperand(i + j + 8));
+ }
+ }
+
+ MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt);
+ SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt);
+
+ SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));
+ SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));
+ LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8);
+ HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8);
+ LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA);
+ HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA);
+ LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8);
+ HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8);
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
}
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
- unsigned ShiftOpcode = Op->getOpcode();
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (VT.is512BitVector()) {
@@ -23648,7 +24736,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
- SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
+ SDValue Z = DAG.getConstant(0, dl, SelVT);
SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
return DAG.getSelect(dl, SelVT, C, V0, V1);
};
@@ -23657,49 +24745,46 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
Amt = DAG.getBitcast(ExtVT, Amt);
- Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+ Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG);
Amt = DAG.getBitcast(VT, Amt);
- if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
+ if (Opc == ISD::SHL || Opc == ISD::SRL) {
// r = VSELECT(r, shift(r, 4), a);
- SDValue M =
- DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));
R = SignBitSelect(VT, Amt, M, R);
return R;
}
- if (Op->getOpcode() == ISD::SRA) {
+ if (Opc == ISD::SRA) {
// For SRA we need to unpack each byte to the higher byte of a i16 vector
// so we can correctly sign extend. We don't care what happens to the
// lower byte.
- SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
- SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
- SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
- SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
+ SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+ SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);
+ SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
// r = VSELECT(r, shift(r, 4), a);
- SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(4, dl, ExtVT));
- SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(4, dl, ExtVT));
+ SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG);
+ SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23708,10 +24793,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 2), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(2, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(2, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG);
+ MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
@@ -23720,45 +24803,38 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
// r = VSELECT(r, shift(r, 1), a);
- MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
- DAG.getConstant(1, dl, ExtVT));
- MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
- DAG.getConstant(1, dl, ExtVT));
+ MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG);
+ MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG);
RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
// Logical shift the result back to the lower byte, leaving a zero upper
- // byte
- // meaning that we can safely pack with PACKUSWB.
- RLo =
- DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
- RHi =
- DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+ // byte meaning that we can safely pack with PACKUSWB.
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG);
+ RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
}
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
- SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
- SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
- SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
- SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
- SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Z, R);
+ SDValue Z = DAG.getConstant(0, dl, VT);
+ SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);
+ SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);
+ SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);
+ SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);
ALo = DAG.getBitcast(ExtVT, ALo);
AHi = DAG.getBitcast(ExtVT, AHi);
RLo = DAG.getBitcast(ExtVT, RLo);
RHi = DAG.getBitcast(ExtVT, RHi);
- SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
- SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
- Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
- Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+ SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
+ Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG);
+ Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG);
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
if (VT == MVT::v8i16) {
- unsigned ShiftOpcode = Op->getOpcode();
-
// If we have a constant shift amount, the non-SSE41 path is best as
// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
bool UseSSE41 = Subtarget.hasSSE41() &&
@@ -23778,7 +24854,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// set all bits of the lanes to true and VSELECT uses that in
// its OR(AND(V0,C),AND(V1,~C)) lowering.
SDValue C =
- DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG);
return DAG.getSelect(dl, VT, C, V0, V1);
};
@@ -23788,42 +24864,42 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// bytes for PBLENDVB.
Amt = DAG.getNode(
ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
- DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
+ getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG),
+ getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG));
} else {
- Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
+ Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG);
}
// r = VSELECT(r, shift(r, 8), a);
- SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
+ SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 4), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// r = VSELECT(r, shift(r, 2), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG);
R = SignBitSelect(Amt, M, R);
// a += a
Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
// return VSELECT(r, shift(r, 1), a);
- M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+ M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG);
R = SignBitSelect(Amt, M, R);
return R;
}
- // Decompose 256-bit shifts into smaller 128-bit shifts.
+ // Decompose 256-bit shifts into 128-bit shifts.
if (VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
return SDValue();
}
@@ -23838,20 +24914,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ int NumElts = VT.getVectorNumElements();
+
+ // Check for constant splat rotation amount.
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ int CstSplatIndex = -1;
+ if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits))
+ for (int i = 0; i != NumElts; ++i)
+ if (!UndefElts[i]) {
+ if (CstSplatIndex < 0 || EltBits[i] == EltBits[CstSplatIndex]) {
+ CstSplatIndex = i;
+ continue;
+ }
+ CstSplatIndex = -1;
+ break;
+ }
+ // AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
- APInt UndefElts;
- SmallVector<APInt, 16> EltBits;
- if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
- if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
- return EltBits[0] == V;
- })) {
- unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
- uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
- return DAG.getNode(Op, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
- }
+ if (0 <= CstSplatIndex) {
+ unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+ return DAG.getNode(Op, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
}
// Else, fall-back on VPROLV/VPRORV.
@@ -23862,20 +24949,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
+ // XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) {
- // Split 256-bit integers.
if (VT.is256BitVector())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
- return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
- DAG.getConstant(RotateAmt, DL, MVT::i8));
- }
+ if (0 <= CstSplatIndex) {
+ uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+ return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
}
// Use general rotate by variable (per-element).
@@ -23884,7 +24968,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Split 256-bit integers on pre-AVX2 targets.
if (VT.is256BitVector() && !Subtarget.hasAVX2())
- return Lower256IntArith(Op, DAG);
+ return split256IntArith(Op, DAG);
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
@@ -23892,44 +24976,19 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
"Only vXi32/vXi16/vXi8 vector rotates supported");
// Rotate by an uniform constant - expand back to shifts.
- // TODO - legalizers should be able to handle this.
- if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
- if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
- if (RotateAmt == 0)
- return R;
-
- SDValue AmtR = DAG.getConstant(EltSizeInBits - RotateAmt, DL, VT);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
- }
+ if (0 <= CstSplatIndex)
+ return SDValue();
- // Rotate by splat - expand back to shifts.
- // TODO - legalizers should be able to handle this.
- if ((EltSizeInBits >= 16 || Subtarget.hasBWI()) &&
- IsSplatValue(VT, Amt, DL, DAG, Subtarget, Opcode)) {
- SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
- AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
+ bool IsSplatAmt = DAG.isSplatValue(Amt);
// v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
// the amount bit.
- if (EltSizeInBits == 8) {
- if (Subtarget.hasBWI()) {
- SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
- AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
- SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
- return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
- }
+ if (EltSizeInBits == 8 && !IsSplatAmt) {
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
+ return SDValue();
- MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+ // We don't need ModuloAmt here as we just peek at individual bits.
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
@@ -23943,7 +25002,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
- SDValue Z = getZeroVector(SelVT, Subtarget, DAG, DL);
+ SDValue Z = DAG.getConstant(0, DL, SelVT);
SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
return DAG.getSelect(DL, SelVT, C, V0, V1);
};
@@ -23984,14 +25043,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SignBitSelect(VT, Amt, M, R);
}
+ // ISD::ROT* uses modulo rotate amounts.
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
+ DAG.getConstant(EltSizeInBits - 1, DL, VT));
+
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
- // Best to fallback for all supported variable shifts.
- // AVX2 - best to fallback for non-constants as well.
- // TODO - legalizers should be able to handle this.
- if (LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
+ // Fallback for splats + all supported variable shifts.
+ // Fallback for non-constants AVX2 vXi16 as well.
+ if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
@@ -24032,78 +25094,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7}));
}
-static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
- // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
- // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
- // looks for this combo and may remove the "setcc" instruction if the "setcc"
- // has only one use.
- SDNode *N = Op.getNode();
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- unsigned BaseOp = 0;
- X86::CondCode Cond;
- SDLoc DL(Op);
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown ovf instruction!");
- case ISD::SADDO:
- // A subtract of one will be selected as a INC. Note that INC doesn't
- // set CF, so we can't do this for UADDO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
- BaseOp = X86ISD::ADD;
- Cond = X86::COND_O;
- break;
- case ISD::UADDO:
- BaseOp = X86ISD::ADD;
- Cond = X86::COND_B;
- break;
- case ISD::SSUBO:
- // A subtract of one will be selected as a DEC. Note that DEC doesn't
- // set CF, so we can't do this for USUBO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
- BaseOp = X86ISD::SUB;
- Cond = X86::COND_O;
- break;
- case ISD::USUBO:
- BaseOp = X86ISD::SUB;
- Cond = X86::COND_B;
- break;
- case ISD::SMULO:
- BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
- Cond = X86::COND_O;
- break;
- case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
- if (N->getValueType(0) == MVT::i8) {
- BaseOp = X86ISD::UMUL8;
- Cond = X86::COND_O;
- break;
- }
- SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
- MVT::i32);
- SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
-
- SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
- }
- }
-
- // Also sets EFLAGS.
- SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
-
- SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);
-
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
-}
-
/// Returns true if the operand type is exactly twice the native width, and
/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
@@ -24246,7 +25236,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
+ SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, dl, MVT::i8), // Scale
@@ -24256,7 +25246,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
Zero,
Chain
};
- SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
+ SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
@@ -24369,40 +25359,32 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- if (DstVT != MVT::f64)
+ if (DstVT != MVT::f64 && DstVT != MVT::i64 &&
+ !(DstVT == MVT::x86mmx && SrcVT.isVector()))
// This conversion needs to be expanded.
return SDValue();
- SmallVector<SDValue, 16> Elts;
SDLoc dl(Op);
- unsigned NumElts;
- MVT SVT;
if (SrcVT.isVector()) {
- NumElts = SrcVT.getVectorNumElements();
- SVT = SrcVT.getVectorElementType();
-
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Src,
- DAG.getIntPtrConstant(i, dl)));
+ MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
+ SrcVT.getVectorNumElements() * 2);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
+ DAG.getUNDEF(SrcVT));
} else {
assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
- Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(0, dl)));
- Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(1, dl)));
- NumElts = 2;
- SVT = MVT::i32;
- }
- // Explicitly mark the extra elements as Undef.
- Elts.append(NumElts, DAG.getUNDEF(SVT));
-
- EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
- SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
+ Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ }
+
+ MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
+ Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);
+
+ if (DstVT == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
DAG.getIntPtrConstant(0, dl));
}
@@ -24445,7 +25427,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// PSADBW instruction horizontally add all bytes and leave the result in i64
// chunks, thus directly computes the pop count for v2i64 and v4i64.
if (EltVT == MVT::i64) {
- SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
return DAG.getBitcast(VT, V);
@@ -24457,13 +25439,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// this is that it lines up the results of two PSADBW instructions to be
// two v2i64 vectors which concatenated are the 4 population counts. We can
// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
+ SDValue Zeros = DAG.getConstant(0, DL, VT);
SDValue V32 = DAG.getBitcast(VT, V);
- SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
- SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
+ SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
+ SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);
// Do the horizontal sums into two v2i64s.
- Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+ Zeros = DAG.getConstant(0, DL, ByteVecVT);
MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, Low), Zeros);
@@ -24498,7 +25480,9 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
- unsigned VecSize = VT.getSizeInBits();
+ int NumElts = VT.getVectorNumElements();
+ (void)EltVT;
+ assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported.");
// Implement a lookup table in register by using an algorithm based on:
// http://wm.ite.pl/articles/sse-popcount.html
@@ -24510,109 +25494,30 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
// masked out higher ones) for each byte. PSHUFB is used separately with both
// to index the in-register table. Next, both are added and the result is a
// i8 vector where each element contains the pop count for input byte.
- //
- // To obtain the pop count for elements != i8, we follow up with the same
- // approach and use additional tricks as described below.
- //
const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
/* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
/* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
/* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
- int NumByteElts = VecSize / 8;
- MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
- SDValue In = DAG.getBitcast(ByteVecVT, Op);
SmallVector<SDValue, 64> LUTVec;
- for (int i = 0; i < NumByteElts; ++i)
+ for (int i = 0; i < NumElts; ++i)
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
- SDValue InRegLUT = DAG.getBuildVector(ByteVecVT, DL, LUTVec);
- SDValue M0F = DAG.getConstant(0x0F, DL, ByteVecVT);
+ SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);
+ SDValue M0F = DAG.getConstant(0x0F, DL, VT);
// High nibbles
- SDValue FourV = DAG.getConstant(4, DL, ByteVecVT);
- SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
+ SDValue FourV = DAG.getConstant(4, DL, VT);
+ SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);
// Low nibbles
- SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
+ SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);
// The input vector is used as the shuffle mask that index elements into the
// LUT. After counting low and high nibbles, add the vector to obtain the
// final pop count per i8 element.
- SDValue HighPopCnt =
- DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
- SDValue LowPopCnt =
- DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
- SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
-
- if (EltVT == MVT::i8)
- return PopCnt;
-
- return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
-}
-
-static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- assert(VT.is128BitVector() &&
- "Only 128-bit vector bitmath lowering supported.");
-
- int VecSize = VT.getSizeInBits();
- MVT EltVT = VT.getVectorElementType();
- int Len = EltVT.getSizeInBits();
-
- // This is the vectorized version of the "best" algorithm from
- // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
- // with a minor tweak to use a series of adds + shifts instead of vector
- // multiplications. Implemented for all integer vector types. We only use
- // this when we don't have SSSE3 which allows a LUT-based lowering that is
- // much faster, even faster than using native popcnt instructions.
-
- auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
- MVT VT = V.getSimpleValueType();
- SDValue ShifterV = DAG.getConstant(Shifter, DL, VT);
- return DAG.getNode(OpCode, DL, VT, V, ShifterV);
- };
- auto GetMask = [&](SDValue V, APInt Mask) {
- MVT VT = V.getSimpleValueType();
- SDValue MaskV = DAG.getConstant(Mask, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, V, MaskV);
- };
-
- // We don't want to incur the implicit masks required to SRL vNi8 vectors on
- // x86, so set the SRL type to have elements at least i16 wide. This is
- // correct because all of our SRLs are followed immediately by a mask anyways
- // that handles any bits that sneak into the high bits of the byte elements.
- MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
-
- SDValue V = Op;
-
- // v = v - ((v >> 1) & 0x55555555...)
- SDValue Srl =
- DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
- SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
- V = DAG.getNode(ISD::SUB, DL, VT, V, And);
-
- // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
- SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
- Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
- SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
- V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
-
- // v = (v + (v >> 4)) & 0x0F0F0F0F...
- Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
- V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
-
- // At this point, V contains the byte-wise population count, and we are
- // merely doing a horizontal sum if necessary to get the wider element
- // counts.
- if (EltVT == MVT::i8)
- return V;
-
- return LowerHorizontalByteSum(
- DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
- DAG);
+ SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);
+ SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);
+ return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt);
}
// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
@@ -24638,12 +25543,6 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
}
}
- if (!Subtarget.hasSSSE3()) {
- // We can't use the fast LUT approach, so fall back on vectorized bitmath.
- assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
- return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
- }
-
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntUnary(Op, DAG);
@@ -24652,6 +25551,18 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is512BitVector() && !Subtarget.hasBWI())
return Lower512IntUnary(Op, DAG);
+ // For element types greater than i8, do vXi8 pop counts and a bytesum.
+ if (VT.getScalarType() != MVT::i8) {
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ SDValue ByteOp = DAG.getBitcast(ByteVT, Op0);
+ SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp);
+ return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG);
+ }
+
+ // We can't use the fast LUT approach, so fall back on LegalizeDAG.
+ if (!Subtarget.hasSSSE3())
+ return SDValue();
+
return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
}
@@ -24759,8 +25670,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
}
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- bool AllowIncDec = true) {
+ const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
@@ -24784,25 +25694,6 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
- if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
- // Convert to inc/dec if they aren't slow or we are optimizing for size.
- if (AllowIncDec && (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- if ((NewOpc == X86ISD::LADD && C->isOne()) ||
- (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
- (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- }
- }
-
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
@@ -25120,8 +26011,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
// VLX the vector should be widened to 512 bit
unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
- SDValue Src0 = N->getSrc0();
- Src0 = ExtendToType(Src0, WideDataVT, DAG);
+ SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
// Mask element has to be i1.
assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
@@ -25131,7 +26021,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
- N->getBasePtr(), Mask, Src0,
+ N->getBasePtr(), Mask, PassThru,
N->getMemoryVT(), N->getMemOperand(),
N->getExtensionType(),
N->isExpandingLoad());
@@ -25194,7 +26084,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
MVT VT = Op.getSimpleValueType();
SDValue Index = N->getIndex();
SDValue Mask = N->getMask();
- SDValue Src0 = N->getValue();
+ SDValue PassThru = N->getPassThru();
MVT IndexVT = Index.getSimpleValueType();
MVT MaskVT = Mask.getSimpleValueType();
@@ -25219,12 +26109,12 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
- Src0 = ExtendToType(Src0, VT, DAG);
+ PassThru = ExtendToType(PassThru, VT, DAG);
Index = ExtendToType(Index, IndexVT, DAG);
Mask = ExtendToType(Mask, MaskVT, DAG, true);
}
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index,
+ SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
N->getScale() };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
@@ -25308,6 +26198,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::FSHL:
+ case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
@@ -25322,6 +26214,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
+ case ISD::FADD:
+ case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -25354,12 +26248,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
- case ISD::UMUL_LOHI:
- case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::ROTL:
case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
@@ -25376,12 +26268,16 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
- case ISD::SUB: return LowerADD_SUB(Op, DAG);
+ case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget);
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
- case ISD::ABS: return LowerABS(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -25421,32 +26317,70 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::MUL: {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
+ VT.getVectorNumElements() == 2) {
+ // Promote to a pattern that will be turned into PMULUDQ.
+ SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+ N->getOperand(1));
+ SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
+ } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+ VT.getVectorElementType() == MVT::i8) {
+ // Pre-promote these to vXi16 to avoid op legalization thinking all 16
+ // elements are needed.
+ MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ unsigned NumConcats = 16 / VT.getVectorNumElements();
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
+ Results.push_back(Res);
+ }
+ return;
+ }
+ case ISD::UADDSAT:
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT:
+ case X86ISD::VPMADDWD:
case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG by expanding vectors.
+ // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
+ // X86ISD::AVG/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- auto InVT = N->getValueType(0);
- assert(InVT.getSizeInBits() < 128);
- assert(128 % InVT.getSizeInBits() == 0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = N->getOperand(0).getValueType();
+ assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
+ "Expected a VT that divides into 128 bits.");
unsigned NumConcat = 128 / InVT.getSizeInBits();
- EVT RegVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- NumConcat * InVT.getVectorNumElements());
+ EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(),
+ NumConcat * InVT.getVectorNumElements());
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ NumConcat * VT.getVectorNumElements());
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = N->getOperand(0);
- SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
Ops[0] = N->getOperand(1);
- SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
- SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
- if (getTypeAction(*DAG.getContext(), InVT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
@@ -25456,7 +26390,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// setCC result type is v2i1 because type legalzation will end up with
// a v4i1 setcc plus an extend.
assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
- if (N->getOperand(0).getValueType() != MVT::v2f32)
+ if (N->getOperand(0).getValueType() != MVT::v2f32 ||
+ getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector)
return;
SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
@@ -25465,9 +26400,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
N->getOperand(1), UNDEF);
SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
N->getOperand(2));
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
@@ -25489,13 +26423,198 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
- case ISD::UREM:
+ case ISD::UREM: {
+ EVT VT = N->getValueType(0);
+ if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) {
+ // If this RHS is a constant splat vector we can widen this and let
+ // division/remainder by constant optimize it.
+ // TODO: Can we do something for non-splat?
+ APInt SplatVal;
+ if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) {
+ unsigned NumConcats = 128 / VT.getSizeInBits();
+ SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));
+ Ops0[0] = N->getOperand(0);
+ EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0);
+ SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);
+ Results.push_back(Res);
+ }
+ return;
+ }
+
+ if (VT == MVT::v2i32) {
+ // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the
+ // v2i64 and unroll later. But then we create i64 scalar ops which
+ // might be slow in 64-bit mode or require a libcall in 32-bit mode.
+ Results.push_back(DAG.UnrollVectorOp(N));
+ return;
+ }
+
+ if (VT.isVector())
+ return;
+
+ LLVM_FALLTHROUGH;
+ }
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);
return;
}
+ case ISD::TRUNCATE: {
+ MVT VT = N->getSimpleValueType(0);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ // The generic legalizer will try to widen the input type to the same
+ // number of elements as the widened result type. But this isn't always
+ // the best thing so do some custom legalization to avoid some cases.
+ MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+
+ unsigned InBits = InVT.getSizeInBits();
+ if (128 % InBits == 0) {
+ // 128 bit and smaller inputs should avoid truncate all together and
+ // just use a build_vector that will become a shuffle.
+ // TODO: Widen and use a shuffle directly?
+ MVT InEltVT = InVT.getSimpleVT().getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = VT.getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
+ DAG.getIntPtrConstant(i, dl));
+ Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val);
+ }
+ Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops));
+ return;
+ }
+ // With AVX512 there are some cases that can use a target specific
+ // truncate node to go from 256/512 to less than 128 with zeros in the
+ // upper elements of the 128 bit result.
+ if (Subtarget.hasAVX512() && isTypeLegal(InVT)) {
+ // We can use VTRUNC directly if for 256 bits with VLX or for any 512.
+ if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) {
+ Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+ return;
+ }
+ // There's one case we can widen to 512 bits and use VTRUNC.
+ if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) {
+ In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In,
+ DAG.getUNDEF(MVT::v4i64));
+ Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+ return;
+ }
+ }
+ return;
+ }
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (ExperimentalVectorWideningLegalization)
+ return;
+
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+ (InVT == MVT::v16i16 || InVT == MVT::v32i8)) {
+ // Custom split this so we can extend i8/i16->i32 invec. This is better
+ // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+ // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+ // we allow the sra from the extend to i32 to be shared by the split.
+ EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(),
+ InVT.getVectorNumElements() / 2);
+ MVT ExtendVT = MVT::getVectorVT(MVT::i32,
+ VT.getVectorNumElements());
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
+ In, DAG.getIntPtrConstant(0, dl));
+ In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
+
+ // Fill a vector with sign bits for each element.
+ SDValue Zero = DAG.getConstant(0, dl, ExtendVT);
+ SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Create an unpackl and unpackh to interleave the sign bits then bitcast
+ // to vXi64.
+ SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ return;
+ }
+ return;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ if (!ExperimentalVectorWideningLegalization)
+ return;
+
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+ if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+ // Custom split this so we can extend i8/i16->i32 invec. This is better
+ // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+ // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+ // we allow the sra from the extend to i32 to be shared by the split.
+ In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
+
+ // Fill a vector with sign bits for each element.
+ SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+ SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);
+
+ // Create an unpackl and unpackh to interleave the sign bits then bitcast
+ // to v2i64.
+ SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+ {0, 4, 1, 5});
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
+ SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+ {2, 6, 3, 7});
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ return;
+ }
+
+ if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
+ // Perform custom splitting instead of the two stage extend we would get
+ // by default.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ assert(isTypeLegal(LoVT) && "Split VT not legal?");
+
+ bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
+
+ SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG);
+
+ // We need to shift the input over by half the number of elements.
+ unsigned NumElts = InVT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef);
+ for (unsigned i = 0; i != HalfNumElts; ++i)
+ ShufMask[i] = i + HalfNumElts;
+
+ SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+ Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ }
+ return;
+ }
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
@@ -25503,38 +26622,90 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
+ // Promote these manually to avoid over promotion to v2i64. Type
+ // legalization will revisit the v2i32 operation for more cleanup.
+ if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
+ getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
+ // AVX512DQ provides instructions that produce a v2i64 result.
+ if (Subtarget.hasDQI())
+ return;
+
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, MVT::v2i32, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ Results.push_back(Res);
+ return;
+ }
+
+ if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ // Try to create a 128 bit vector, but don't exceed a 32 bit element.
+ unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
+ MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
+ VT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
+
+ // Preserve what we know about the size of the original result. Except
+ // when the result is v2i32 since we can't widen the assert.
+ if (PromoteVT != MVT::v2i32)
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, PromoteVT, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+
+ // Truncate back to the original width.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / VT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
+ VT.getVectorNumElements() * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ Results.push_back(Res);
+ return;
+ }
+
+
if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+ bool Widenv2i32 =
+ getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
if (Src.getValueType() == MVT::v2f64) {
- MVT ResVT = MVT::v4i32;
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
if (!IsSigned && !Subtarget.hasVLX()) {
- // Widen to 512-bits.
- ResVT = MVT::v8i32;
+ // If v2i32 is widened, we can defer to the generic legalizer.
+ if (Widenv2i32)
+ return;
+ // Custom widen by doubling to a legal vector with. Isel will
+ // further widen to v8f64.
Opc = ISD::FP_TO_UINT;
- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
- DAG.getUNDEF(MVT::v8f64),
- Src, DAG.getIntPtrConstant(0, dl));
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
+ Src, DAG.getUNDEF(MVT::v2f64));
}
- SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
- bool WidenType = getTypeAction(*DAG.getContext(),
- MVT::v2i32) == TypeWidenVector;
- ResVT = WidenType ? MVT::v4i32 : MVT::v2i32;
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Res,
- DAG.getIntPtrConstant(0, dl));
+ SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
+ if (!Widenv2i32)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
}
- if (SrcVT == MVT::v2f32) {
+ if (SrcVT == MVT::v2f32 &&
+ getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
: ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
}
@@ -25610,7 +26781,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_ROUND: {
- if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+ if (!isTypeLegal(N->getOperand(0).getValueType()))
return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
@@ -25780,29 +26951,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
if (SrcVT != MVT::f64 ||
- (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
+ (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
+ getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
return;
unsigned NumElts = DstVT.getVectorNumElements();
EVT SVT = DstVT.getVectorElementType();
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
- SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- MVT::v2f64, N->getOperand(0));
- SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
-
- if (getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) {
- // If we are legalizing vectors by widening, we already have the desired
- // legal vector type, just return it.
- Results.push_back(ToVecInt);
- return;
- }
-
- SmallVector<SDValue, 8> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
- ToVecInt, DAG.getIntPtrConstant(i, dl)));
-
- Results.push_back(DAG.getBuildVector(DstVT, dl, Elts));
+ SDValue Res;
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
+ Res = DAG.getBitcast(WiderVT, Res);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
return;
}
case ISD::MGATHER: {
@@ -25814,9 +26975,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
- Gather->getValue(),
- DAG.getUNDEF(MVT::v2f32));
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+ Gather->getPassThru(),
+ DAG.getUNDEF(MVT::v2f32));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
@@ -25824,8 +26985,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25838,9 +26999,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Index = Gather->getIndex();
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
- SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
- Gather->getValue(),
- DAG.getUNDEF(MVT::v2i32));
+ SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
+ Gather->getPassThru(),
+ DAG.getUNDEF(MVT::v2i32));
// If the index is v2i64 we can use it directly.
if (Index.getValueType() == MVT::v2i64 &&
(Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
@@ -25851,8 +27012,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25864,28 +27025,56 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Chain);
return;
}
- EVT IndexVT = Index.getValueType();
- EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
- IndexVT.getScalarType(), 4);
- // Otherwise we need to custom widen everything to avoid promotion.
- Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
- DAG.getUNDEF(IndexVT));
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
- DAG.getConstant(0, dl, MVT::v2i1));
- SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
- Index, Gather->getScale() };
- SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
- Gather->getMemoryVT(), dl, Ops,
- Gather->getMemOperand());
- SDValue Chain = Res.getValue(1);
- if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
- DAG.getIntPtrConstant(0, dl));
- Results.push_back(Res);
- Results.push_back(Chain);
- return;
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+ EVT IndexVT = Index.getValueType();
+ EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ IndexVT.getScalarType(), 4);
+ // Otherwise we need to custom widen everything to avoid promotion.
+ Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
+ DAG.getUNDEF(IndexVT));
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
+ DAG.getConstant(0, dl, MVT::v2i1));
+ SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale() };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
+ Gather->getMemoryVT(), dl, Ops,
+ Gather->getMemOperand());
+ SDValue Chain = Res.getValue(1);
+ if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
}
- break;
+ return;
+ }
+ case ISD::LOAD: {
+ // Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This
+ // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
+ // cast since type legalization will try to use an i64 load.
+ MVT VT = N->getSimpleValueType(0);
+ assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+ if (!ISD::isNON_EXTLoad(N))
+ return;
+ auto *Ld = cast<LoadSDNode>(N);
+ MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+ SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(),
+ Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ SDValue Chain = Res.getValue(1);
+ MVT WideVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ Res = DAG.getBitcast(CastVT, Res);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
}
}
}
@@ -25943,9 +27132,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
- case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
- case X86ISD::ADDUS: return "X86ISD::ADDUS";
- case X86ISD::SUBUS: return "X86ISD::SUBUS";
+ case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -25988,15 +27175,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
- case X86ISD::LINC: return "X86ISD::LINC";
- case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
- case X86ISD::VZEXT: return "X86ISD::VZEXT";
- case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
+ case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC";
+ case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS";
+ case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS";
case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
@@ -26005,6 +27191,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
+ case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -26029,16 +27216,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
- case X86ISD::SMUL8: return "X86ISD::SMUL8";
- case X86ISD::UMUL8: return "X86ISD::UMUL8";
- case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
- case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
- case X86ISD::INC: return "X86ISD::INC";
- case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
+ case X86ISD::BZHI: return "X86ISD::BZHI";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
@@ -26136,7 +27318,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
case X86ISD::EXPAND: return "X86ISD::EXPAND";
- case X86ISD::SELECT: return "X86ISD::SELECT";
case X86ISD::SELECTS: return "X86ISD::SELECTS";
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
case X86ISD::RCP14: return "X86ISD::RCP14";
@@ -26162,16 +27343,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
- case X86ISD::ADDS: return "X86ISD::ADDS";
- case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
+ case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI";
+ case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI";
case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND";
case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND";
+ case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI";
+ case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI";
case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND";
case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND";
case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
@@ -26182,12 +27365,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
+ case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
+ case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI";
+ case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI";
case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND";
+ case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI";
+ case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI";
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
@@ -26321,6 +27509,10 @@ bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
+bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
+ return isInt<32>(Imm);
+}
+
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
@@ -26434,7 +27626,7 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
// If the subtarget is using retpolines, we need to not generate jump tables.
- if (Subtarget.useRetpoline())
+ if (Subtarget.useRetpolineIndirectBranches())
return false;
// Otherwise, fallback on the generic logic.
@@ -26633,8 +27825,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Memory Reference
assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 1> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
// Machine Information
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -26732,7 +27924,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -26757,7 +27949,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 16)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -26785,7 +27977,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
.addReg(NextOffsetReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -26804,7 +27996,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 8)
.add(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
@@ -26841,7 +28033,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, 8)
.add(Segment)
.addReg(NextAddrReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .setMemRefs(MMOs);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
@@ -26981,19 +28173,17 @@ static bool isCMOVPseudo(MachineInstr &MI) {
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
- case X86::CMOV_V2F64:
- case X86::CMOV_V2I64:
- case X86::CMOV_V4F32:
- case X86::CMOV_V4F64:
- case X86::CMOV_V4I64:
- case X86::CMOV_V16F32:
- case X86::CMOV_V8F32:
- case X86::CMOV_V8F64:
- case X86::CMOV_V8I64:
- case X86::CMOV_V8I1:
- case X86::CMOV_V16I1:
- case X86::CMOV_V32I1:
- case X86::CMOV_V64I1:
+ case X86::CMOV_VR128:
+ case X86::CMOV_VR128X:
+ case X86::CMOV_VR256:
+ case X86::CMOV_VR256X:
+ case X86::CMOV_VR512:
+ case X86::CMOV_VK2:
+ case X86::CMOV_VK4:
+ case X86::CMOV_VK8:
+ case X86::CMOV_VK16:
+ case X86::CMOV_VK32:
+ case X86::CMOV_VK64:
return true;
default:
@@ -27815,8 +29005,8 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineInstrBuilder MIB;
// Memory Reference.
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
// Initialize a register with zero.
MVT PVT = getPointerTy(MF->getDataLayout());
@@ -27845,7 +29035,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MIB.add(MI.getOperand(MemOpndSlot + i));
}
MIB.addReg(SSPCopyReg);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
}
MachineBasicBlock *
@@ -27861,8 +29051,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
unsigned DstReg;
unsigned MemOpndSlot = 0;
@@ -27956,7 +29146,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MIB.addReg(LabelReg);
else
MIB.addMBB(restoreMBB);
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
emitSetJmpShadowStackFix(MI, thisMBB);
@@ -28017,8 +29207,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
MVT PVT = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -28100,12 +29290,16 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
- MIB.addDisp(MI.getOperand(i), SPPOffset);
+ MIB.addDisp(MO, SPPOffset);
+ else if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MO);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Subtract the current SSP from the previous SSP.
unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
@@ -28189,8 +29383,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+ MI.memoperands_end());
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
@@ -28221,19 +29415,29 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
// Reload FP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI.getOperand(i));
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
+ else
+ MIB.add(MO);
+ }
+ MIB.setMemRefs(MMOs);
// Reload IP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
- MIB.addDisp(MI.getOperand(i), LabelOffset);
+ MIB.addDisp(MO, LabelOffset);
+ else if (MO.isReg()) // Don't add the whole operand, we don't want to
+ // preserve kill flags.
+ MIB.addReg(MO.getReg());
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MO);
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Reload SP
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
@@ -28241,9 +29445,10 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
else
- MIB.add(MI.getOperand(i));
+ MIB.add(MI.getOperand(i)); // We can preserve the kill flags here, it's
+ // the last instruction of the expansion.
}
- MIB.setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
// Jump
BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
@@ -28562,26 +29767,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_FR32:
case X86::CMOV_FR64:
- case X86::CMOV_F128:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
- case X86::CMOV_V2F64:
- case X86::CMOV_V2I64:
- case X86::CMOV_V4F32:
- case X86::CMOV_V4F64:
- case X86::CMOV_V4I64:
- case X86::CMOV_V16F32:
- case X86::CMOV_V8F32:
- case X86::CMOV_V8F64:
- case X86::CMOV_V8I64:
- case X86::CMOV_V8I1:
- case X86::CMOV_V16I1:
- case X86::CMOV_V32I1:
- case X86::CMOV_V64I1:
+ case X86::CMOV_VR128:
+ case X86::CMOV_VR128X:
+ case X86::CMOV_VR256:
+ case X86::CMOV_VR256X:
+ case X86::CMOV_VR512:
+ case X86::CMOV_VK2:
+ case X86::CMOV_VK4:
+ case X86::CMOV_VK8:
+ case X86::CMOV_VK16:
+ case X86::CMOV_VK32:
+ case X86::CMOV_VK64:
return EmitLoweredSelect(MI, BB);
case X86::RDFLAGS32:
@@ -28890,11 +30092,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
EVT SrcVT = Src.getValueType();
APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
Op.getConstantOperandVal(1));
- DAG.computeKnownBits(Src, Known, DemandedElt, Depth + 1);
+ Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
Known = Known.zextOrTrunc(BitWidth);
Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
break;
}
+ case X86ISD::VSRAI:
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -28903,72 +30106,62 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
- DAG.computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned ShAmt = ShiftImm->getZExtValue();
if (Opc == X86ISD::VSHLI) {
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// Low bits are known zero.
Known.Zero.setLowBits(ShAmt);
- } else {
+ } else if (Opc == X86ISD::VSRLI) {
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// High bits are known zero.
Known.Zero.setHighBits(ShAmt);
+ } else {
+ Known.Zero.ashrInPlace(ShAmt);
+ Known.One.ashrInPlace(ShAmt);
}
}
break;
}
case X86ISD::PACKUS: {
// PACKUS is just a truncation if the upper half is zero.
- // TODO: Add DemandedElts support.
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+ Known.One = APInt::getAllOnesValue(BitWidth * 2);
+ Known.Zero = APInt::getAllOnesValue(BitWidth * 2);
+
KnownBits Known2;
- DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
- DAG.computeKnownBits(Op.getOperand(1), Known2, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ if (!!DemandedLHS) {
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ if (!!DemandedRHS) {
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+
if (Known.countMinLeadingZeros() < BitWidth)
Known.resetAll();
Known = Known.trunc(BitWidth);
break;
}
- case X86ISD::VZEXT: {
- // TODO: Add DemandedElts support.
- SDValue N0 = Op.getOperand(0);
- unsigned NumElts = VT.getVectorNumElements();
-
- EVT SrcVT = N0.getValueType();
- unsigned InNumElts = SrcVT.getVectorNumElements();
- unsigned InBitWidth = SrcVT.getScalarSizeInBits();
- assert(InNumElts >= NumElts && "Illegal VZEXT input");
-
- Known = KnownBits(InBitWidth);
- APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
- DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBitWidth);
- break;
- }
case X86ISD::CMOV: {
- DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
+ Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
- KnownBits Known2;
- DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
}
- case X86ISD::UDIVREM8_ZEXT_HREG:
- // TODO: Support more than just the zero extended bits?
- if (Op.getResNo() != 1)
- break;
- // The remainder is zero extended.
- Known.Zero.setBitsFrom(8);
- break;
}
// Handle target shuffles.
@@ -29013,8 +30206,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) {
if (!DemandedOps[i])
continue;
- KnownBits Known2;
- DAG.computeKnownBits(Ops[i], Known2, DemandedOps[i], Depth + 1);
+ KnownBits Known2 =
+ DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
@@ -29033,14 +30226,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
return VTBits;
- case X86ISD::VSEXT: {
- // TODO: Add DemandedElts support.
- SDValue Src = Op.getOperand(0);
- unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
- Tmp += VTBits - Src.getScalarValueSizeInBits();
- return Tmp;
- }
-
case X86ISD::VTRUNC: {
// TODO: Add DemandedElts support.
SDValue Src = Op.getOperand(0);
@@ -29054,10 +30239,16 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::PACKSS: {
// PACKSS is just a truncation if the sign bits extend to the packed size.
- // TODO: Add DemandedElts support.
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS,
+ DemandedRHS);
+
unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits();
- unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
+ if (!!DemandedLHS)
+ Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS)
+ Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
unsigned Tmp = std::min(Tmp0, Tmp1);
if (Tmp > (SrcBits - VTBits))
return Tmp - (SrcBits - VTBits);
@@ -29099,12 +30290,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
return std::min(Tmp0, Tmp1);
}
- case X86ISD::SDIVREM8_SEXT_HREG:
- // TODO: Support more than just the sign extended bits?
- if (Op.getResNo() != 1)
- break;
- // The remainder is sign extended.
- return VTBits - 7;
}
// Fallback case.
@@ -29117,21 +30302,6 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
return N;
}
-/// Returns true (and the GlobalValue and the offset) if the node is a
-/// GlobalAddress + offset.
-bool X86TargetLowering::isGAPlusOffset(SDNode *N,
- const GlobalValue* &GA,
- int64_t &Offset) const {
- if (N->getOpcode() == X86ISD::Wrapper) {
- if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
- GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
- Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
- return true;
- }
- }
- return TargetLowering::isGAPlusOffset(N, GA, Offset);
-}
-
// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
@@ -29170,10 +30340,12 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
MVT::getIntegerVT(MaskEltSize);
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
- if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) {
+ if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
- Shuffle = unsigned(X86ISD::VZEXT);
- } else
+
+ if (SrcVT.getVectorNumElements() == NumDstElts)
+ Shuffle = unsigned(ISD::ZERO_EXTEND);
+ else
Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
@@ -29430,9 +30602,10 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Attempt to match against either a unary or binary PACKSS/PACKUS shuffle.
- // TODO add support for 256/512-bit types.
- if ((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) {
+ // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
+ if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
+ ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+ ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
Subtarget)) {
DstVT = MaskVT;
@@ -29622,7 +30795,8 @@ static bool matchBinaryPermuteVectorShuffle(
/// instruction but should only be used to replace chains over a certain depth.
static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
ArrayRef<int> BaseMask, int Depth,
- bool HasVariableMask, SelectionDAG &DAG,
+ bool HasVariableMask,
+ bool AllowVariableMask, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
@@ -29835,7 +31009,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
- bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask;
+ AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
@@ -30169,7 +31343,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
static SDValue combineX86ShufflesRecursively(
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
- bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
@@ -30195,30 +31370,36 @@ static SDValue combineX86ShufflesRecursively(
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return SDValue();
- assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
+ // TODO - Add support for more than 2 inputs.
+ if (2 < OpInputs.size())
+ return SDValue();
+
SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
- int InputIdx0 = -1, InputIdx1 = -1;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- SDValue BC = peekThroughBitcasts(Ops[i]);
- if (Input0 && BC == peekThroughBitcasts(Input0))
- InputIdx0 = i;
- if (Input1 && BC == peekThroughBitcasts(Input1))
- InputIdx1 = i;
- }
+ auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
+ if (!Input)
+ return -1;
+ // Attempt to find an existing match.
+ SDValue InputBC = peekThroughBitcasts(Input);
+ for (int i = 0, e = Ops.size(); i < e; ++i)
+ if (InputBC == peekThroughBitcasts(Ops[i]))
+ return i;
+ // Match failed - should we replace an existing Op?
+ if (InsertionPoint >= 0) {
+ Ops[InsertionPoint] = Input;
+ return InsertionPoint;
+ }
+ // Add to the end of the Ops list.
+ Ops.push_back(Input);
+ return Ops.size() - 1;
+ };
- if (Input0 && InputIdx0 < 0) {
- InputIdx0 = SrcOpIndex;
- Ops[SrcOpIndex] = Input0;
- }
- if (Input1 && InputIdx1 < 0) {
- InputIdx1 = Ops.size();
- Ops.push_back(Input1);
- }
+ int InputIdx0 = AddOp(Input0, SrcOpIndex);
+ int InputIdx1 = AddOp(Input1, -1);
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
@@ -30324,18 +31505,23 @@ static SDValue combineX86ShufflesRecursively(
CombinedNodes.push_back(Op.getNode());
// See if we can recurse into each shuffle source op (if it's a target
- // shuffle). The source op should only be combined if it either has a
- // single use (i.e. current Op) or all its users have already been combined.
+ // shuffle). The source op should only be generally combined if it either has
+ // a single use (i.e. current Op) or all its users have already been combined,
+ // if not then we can still combine but should prevent generation of variable
+ // shuffles to avoid constant pool bloat.
// Don't recurse if we already have more source ops than we can combine in
// the remaining recursion depth.
if (Ops.size() < (MaxRecursionDepth - Depth)) {
- for (int i = 0, e = Ops.size(); i < e; ++i)
+ for (int i = 0, e = Ops.size(); i < e; ++i) {
+ bool AllowVar = false;
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
- if (SDValue Res = combineX86ShufflesRecursively(
- Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
- DAG, Subtarget))
- return Res;
+ AllowVar = AllowVariableMask;
+ if (SDValue Res = combineX86ShufflesRecursively(
+ Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
+ AllowVar, DAG, Subtarget))
+ return Res;
+ }
}
// Attempt to constant fold all of the constant source ops.
@@ -30365,8 +31551,8 @@ static SDValue combineX86ShufflesRecursively(
}
// Finally, try to combine into a single shuffle instruction.
- return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
- Subtarget);
+ return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget);
}
/// Get the PSHUF-style mask from PSHUF node.
@@ -30545,74 +31731,6 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
-/// Search for a combinable shuffle across a chain ending in pshuflw or
-/// pshufhw.
-///
-/// We walk up the chain, skipping shuffles of the other half and looking
-/// through shuffles which switch halves trying to find a shuffle of the same
-/// pair of dwords.
-static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
- assert(
- (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
- "Called with something other than an x86 128-bit half shuffle!");
- SDLoc DL(N);
- unsigned CombineOpcode = N.getOpcode();
-
- // Walk up a single-use chain looking for a combinable shuffle.
- SDValue V = N.getOperand(0);
- for (; V.hasOneUse(); V = V.getOperand(0)) {
- switch (V.getOpcode()) {
- default:
- return false; // Nothing combined!
-
- case ISD::BITCAST:
- // Skip bitcasts as we always know the type for the target specific
- // instructions.
- continue;
-
- case X86ISD::PSHUFLW:
- case X86ISD::PSHUFHW:
- if (V.getOpcode() == CombineOpcode)
- break;
-
- // Other-half shuffles are no-ops.
- continue;
- }
- // Break out of the loop if we break out of the switch.
- break;
- }
-
- if (!V.hasOneUse())
- // We fell out of the loop without finding a viable combining instruction.
- return false;
-
- // Combine away the bottom node as its shuffle will be accumulated into
- // a preceding shuffle.
- DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
-
- // Record the old value.
- SDValue Old = V;
-
- // Merge this node's mask and our incoming mask (adjusted to account for all
- // the pshufd instructions encountered).
- SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
- for (int &M : Mask)
- M = VMask[M];
- V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
- getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
-
- // Check that the shuffles didn't cancel each other out. If not, we need to
- // combine to the new one.
- if (Old != V)
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
-
- return true;
-}
-
/// Try to combine x86 target specific shuffles.
static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -30667,7 +31785,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
@@ -30679,40 +31797,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
- case X86ISD::UNPCKL: {
- // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
- // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
- // moves upper half elements into the lower half part. For example:
- //
- // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
- // undef:v16i8
- // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
- //
- // will be combined to:
- //
- // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
-
- // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
- // happen due to advanced instructions.
- if (!VT.is128BitVector())
- return SDValue();
-
- auto Op0 = N.getOperand(0);
- auto Op1 = N.getOperand(1);
- if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) {
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
-
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<int, 8> ExpectedMask(NumElts, -1);
- std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
- NumElts / 2);
-
- auto ShufOp = Op1.getOperand(0);
- if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
- return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
- }
- return SDValue();
- }
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
SDValue N0 = N.getOperand(0);
@@ -30844,9 +31928,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
case X86ISD::PSHUFHW:
assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
- if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
- return SDValue(); // We combined away this shuffle, so we're done.
-
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
@@ -31286,13 +32367,404 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// a particular chain.
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
+
+ // Simplify source operands based on shuffle mask.
+ // TODO - merge this into combineX86ShufflesRecursively.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
+ return SDValue(N, 0);
+ }
+
+ // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
+ // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
+ // FIXME: This can probably go away once we default to widening legalization.
+ if (Subtarget.hasSSE41() && VT == MVT::v4i32 &&
+ N->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N->getOperand(0).getOpcode() == ISD::BITCAST &&
+ N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) {
+ SDValue BC = N->getOperand(0);
+ SDValue MULUDQ = BC.getOperand(0);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVOp->getMask();
+ if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
+ Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
+ SDValue Op0 = MULUDQ.getOperand(0);
+ SDValue Op1 = MULUDQ.getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op0.getOperand(0).getValueType() == MVT::v4i32) {
+ ShuffleVectorSDNode *SVOp0 =
+ cast<ShuffleVectorSDNode>(Op0.getOperand(0));
+ ArrayRef<int> Mask2 = SVOp0->getMask();
+ if (Mask2[0] == 0 && Mask2[1] == -1 &&
+ Mask2[2] == 1 && Mask2[3] == -1) {
+ Op0 = SVOp0->getOperand(0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
+ Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
+ return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+ }
+ }
+ if (Op1.getOpcode() == ISD::BITCAST &&
+ Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op1.getOperand(0).getValueType() == MVT::v4i32) {
+ ShuffleVectorSDNode *SVOp1 =
+ cast<ShuffleVectorSDNode>(Op1.getOperand(0));
+ ArrayRef<int> Mask2 = SVOp1->getMask();
+ if (Mask2[0] == 0 && Mask2[1] == -1 &&
+ Mask2[2] == 1 && Mask2[3] == -1) {
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
+ Op1 = SVOp1->getOperand(0);
+ return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+ }
+ }
+ }
}
return SDValue();
}
+bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ int NumElts = DemandedElts.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ // Handle special case opcodes.
+ switch (Opc) {
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA: {
+ // We only need the bottom 64-bits of the (128-bit) shift amount.
+ SDValue Amt = Op.getOperand(1);
+ MVT AmtVT = Amt.getSimpleValueType();
+ assert(AmtVT.is128BitVector() && "Unexpected value type");
+ APInt AmtUndef, AmtZero;
+ unsigned NumAmtElts = AmtVT.getVectorNumElements();
+ APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
+ if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
+ Depth + 1))
+ return true;
+ LLVM_FALLTHROUGH;
+ }
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI: {
+ SDValue Src = Op.getOperand(0);
+ APInt SrcUndef;
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ // TODO convert SrcUndef to KnownUndef.
+ break;
+ }
+ case X86ISD::CVTSI2P:
+ case X86ISD::CVTUI2P: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ APInt SrcUndef, SrcZero;
+ APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: {
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::VBROADCAST: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (!SrcVT.isVector())
+ return false;
+ // Don't bother broadcasting if we just need the 0'th element.
+ if (DemandedElts == 1) {
+ if(Src.getValueType() != VT)
+ Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
+ SDLoc(Op));
+ return TLO.CombineTo(Op, Src);
+ }
+ APInt SrcUndef, SrcZero;
+ APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PSHUFB: {
+ // TODO - simplify other variable shuffle masks.
+ SDValue Mask = Op.getOperand(1);
+ APInt MaskUndef, MaskZero;
+ if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ }
+
+ // Simplify target shuffles.
+ if (!isTargetShuffle(Opc) || !VT.isSimple())
+ return false;
+
+ // Get target shuffle mask.
+ bool IsUnary;
+ SmallVector<int, 64> OpMask;
+ SmallVector<SDValue, 2> OpInputs;
+ if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs,
+ OpMask, IsUnary))
+ return false;
+
+ // Shuffle inputs must be the same type as the result.
+ if (llvm::any_of(OpInputs,
+ [VT](SDValue V) { return VT != V.getValueType(); }))
+ return false;
+
+ // Clear known elts that might have been set above.
+ KnownZero.clearAllBits();
+ KnownUndef.clearAllBits();
+
+ // Check if shuffle mask can be simplified to undef/zero/identity.
+ int NumSrcs = OpInputs.size();
+ for (int i = 0; i != NumElts; ++i) {
+ int &M = OpMask[i];
+ if (!DemandedElts[i])
+ M = SM_SentinelUndef;
+ else if (0 <= M && OpInputs[M / NumElts].isUndef())
+ M = SM_SentinelUndef;
+ }
+
+ if (isUndefInRange(OpMask, 0, NumElts)) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ if (isUndefOrZeroInRange(OpMask, 0, NumElts)) {
+ KnownZero.setAllBits();
+ return TLO.CombineTo(
+ Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
+ }
+ for (int Src = 0; Src != NumSrcs; ++Src)
+ if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
+ return TLO.CombineTo(Op, OpInputs[Src]);
+
+ // Attempt to simplify inputs.
+ for (int Src = 0; Src != NumSrcs; ++Src) {
+ int Lo = Src * NumElts;
+ APInt SrcElts = APInt::getNullValue(NumElts);
+ for (int i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ int M = OpMask[i] - Lo;
+ if (0 <= M && M < NumElts)
+ SrcElts.setBit(M);
+ }
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+ }
+
+ // Extract known zero/undef elements.
+ // TODO - Propagate input undef/zero elts.
+ for (int i = 0; i != NumElts; ++i) {
+ if (OpMask[i] == SM_SentinelUndef)
+ KnownUndef.setBit(i);
+ if (OpMask[i] == SM_SentinelZero)
+ KnownZero.setBit(i);
+ }
+
+ return false;
+}
+
+bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+ switch(Opc) {
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ: {
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
+ KnownBits KnownOp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // FIXME: Can we bound this better?
+ APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+ if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::VSHLI: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (Op0.getOpcode() == X86ISD::VSRLI &&
+ OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+ if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
+ if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
+ int Diff = ShAmt - Shift2Imm->getZExtValue();
+ if (Diff == 0)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+
+ unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
+ SDValue NewShift = TLO.DAG.getNode(
+ NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
+ TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+ return TLO.CombineTo(Op, NewShift);
+ }
+ }
+ }
+
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
+
+ // Low bits known zero.
+ Known.Zero.setLowBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::VSRLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+ OriginalDemandedElts, Known, TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::VSRAI: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+ // If we just want the sign bit then we don't need to shift it.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op0);
+
+ // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
+ if (Op0.getOpcode() == X86ISD::VSHLI && Op1 == Op0.getOperand(1)) {
+ SDValue Op00 = Op0.getOperand(0);
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
+ if (ShAmt < NumSignBits)
+ return TLO.CombineTo(Op, Op00);
+ }
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
+ DemandedMask.setSignBit();
+
+ if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+ TLO, Depth + 1))
+ return true;
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ OriginalDemandedBits.countLeadingZeros() >= ShAmt)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
+
+ // High bits are known one.
+ if (Known.One[BitWidth - ShAmt - 1])
+ Known.One.setHighBits(ShAmt);
+ }
+ break;
+ }
+ case X86ISD::MOVMSK: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+
+ // If we don't need the sign bits at all just return zero.
+ if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ // Only demand the vector elements of the sign bits we need.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+
+ Known.Zero = KnownZero.zextOrSelf(BitWidth);
+ Known.Zero.setHighBits(BitWidth - NumElts);
+
+ // MOVMSK only uses the MSB from each vector element.
+ KnownBits KnownSrc;
+ if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
+ KnownSrc, TLO, Depth + 1))
+ return true;
+
+ if (KnownSrc.One[SrcBits - 1])
+ Known.One.setLowBits(NumElts);
+ else if (KnownSrc.Zero[SrcBits - 1])
+ Known.Zero.setLowBits(NumElts);
+ return false;
+ }
+ }
+
+ return TargetLowering::SimplifyDemandedBitsForTargetNode(
+ Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
+}
+
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
@@ -31344,9 +32816,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (Idx == SM_SentinelUndef)
return DAG.getUNDEF(EltVT);
+ // Bail if any mask element is SM_SentinelZero - getVectorShuffle below
+ // won't handle it.
+ if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
+ return SDValue();
+
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
- SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0]
- : ShuffleOps[1];
+ SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses =
@@ -31407,9 +32883,18 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
+ // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+ // movmskb even with avx512. This will be better than truncating to vXi1 and
+ // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+ // vpcmpeqb/vpcmpgtb.
+ bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+ N0.getOperand(0).getValueType() == MVT::v32i8 ||
+ N0.getOperand(0).getValueType() == MVT::v64i8);
+
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
@@ -31423,23 +32908,19 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
- MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
switch (VecVT.getSimpleVT().SimpleTy) {
default:
return SDValue();
case MVT::v2i1:
SExtVT = MVT::v2i64;
- FPCastVT = MVT::v2f64;
break;
case MVT::v4i1:
SExtVT = MVT::v4i32;
- FPCastVT = MVT::v4f32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
N0->getOperand(0).getValueType().is256BitVector()) {
SExtVT = MVT::v4i64;
- FPCastVT = MVT::v4f64;
}
break;
case MVT::v8i1:
@@ -31453,7 +32934,6 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
(N0->getOperand(0).getValueType().is256BitVector() ||
N0->getOperand(0).getValueType().is512BitVector())) {
SExtVT = MVT::v8i32;
- FPCastVT = MVT::v8f32;
}
break;
case MVT::v16i1:
@@ -31466,26 +32946,37 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
case MVT::v32i1:
SExtVT = MVT::v32i8;
break;
+ case MVT::v64i1:
+ // If we have AVX512F, but not AVX512BW and the input is truncated from
+ // v64i8 checked earlier. Then split the input and make two pmovmskbs.
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+ SExtVT = MVT::v64i8;
+ break;
+ }
+ return SDValue();
};
SDLoc DL(BitCast);
- SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+ SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
- if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+ if (SExtVT == MVT::v64i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+ Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(32, DL, MVT::i8));
+ V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
- return DAG.getZExtOrTrunc(V, DL, VT);
+ } else {
+ if (SExtVT == MVT::v8i16)
+ V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
+ DAG.getUNDEF(MVT::v8i16));
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
}
-
- if (SExtVT == MVT::v8i16) {
- assert(16 == DAG.ComputeNumSignBits(V) && "Expected all/none bit vector");
- V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
- DAG.getUNDEF(MVT::v8i16));
- } else
- assert(SExtVT.getScalarType() != MVT::i16 &&
- "Vectors of i16 must be packed");
- if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- V = DAG.getBitcast(FPCastVT, V);
- V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
return DAG.getZExtOrTrunc(V, DL, VT);
}
@@ -31806,65 +33297,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Match a binop + shuffle pyramid that represents a horizontal reduction over
-// the elements of a vector.
-// Returns the vector that is being reduced on, or SDValue() if a reduction
-// was not matched.
-static SDValue matchBinOpReduction(SDNode *Extract, unsigned &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps) {
- // The pattern must end in an extract from index 0.
- if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ||
- !isNullConstant(Extract->getOperand(1)))
- return SDValue();
-
- SDValue Op = Extract->getOperand(0);
- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
- // Match against one of the candidate binary ops.
- if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
- return Op.getOpcode() == unsigned(BinOp);
- }))
- return SDValue();
-
- // At each stage, we're looking for something that looks like:
- // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
- // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
- // i32 undef, i32 undef, i32 undef, i32 undef>
- // %a = binop <8 x i32> %op, %s
- // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
- // we expect something like:
- // <4,5,6,7,u,u,u,u>
- // <2,3,u,u,u,u,u,u>
- // <1,u,u,u,u,u,u,u>
- unsigned CandidateBinOp = Op.getOpcode();
- for (unsigned i = 0; i < Stages; ++i) {
- if (Op.getOpcode() != CandidateBinOp)
- return SDValue();
-
- ShuffleVectorSDNode *Shuffle =
- dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
- if (Shuffle) {
- Op = Op.getOperand(1);
- } else {
- Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
- Op = Op.getOperand(0);
- }
-
- // The first operand of the shuffle should be the same as the other operand
- // of the binop.
- if (!Shuffle || Shuffle->getOperand(0) != Op)
- return SDValue();
-
- // Verify the shuffle has the expected (at this stage of the pyramid) mask.
- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
- return SDValue();
- }
-
- BinOp = CandidateBinOp;
- return Op;
-}
-
// Given a select, detect the following pattern:
// 1: %2 = zext <N x i8> %0 to <N x i32>
// 2: %3 = zext <N x i8> %1 to <N x i32>
@@ -31979,8 +33411,8 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
return SDValue();
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
- unsigned BinOp;
- SDValue Src = matchBinOpReduction(
+ ISD::NodeType BinOp;
+ SDValue Src = DAG.matchBinOpReduction(
Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
if (!Src)
return SDValue();
@@ -32027,7 +33459,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
// ready for the PHMINPOS.
if (ExtractVT == MVT::i8) {
SDValue Upper = DAG.getVectorShuffle(
- SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
+ SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8),
{1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
}
@@ -32059,8 +33491,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
return SDValue();
// Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
- unsigned BinOp = 0;
- SDValue Match = matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
+ ISD::NodeType BinOp;
+ SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
if (!Match)
return SDValue();
@@ -32142,8 +33574,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
return SDValue();
// Match shuffle + add pyramid.
- unsigned BinOp = 0;
- SDValue Root = matchBinOpReduction(Extract, BinOp, {ISD::ADD});
+ ISD::NodeType BinOp;
+ SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
// The operand is expected to be zero extended from i8
// (verified in detectZextAbsDiff).
@@ -32238,6 +33670,15 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
scaleShuffleMask<int>(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
+ // Simplify Mask based on demanded element.
+ int ExtractIdx = (int)N->getConstantOperandVal(1);
+ int Scale = Mask.size() / NumSrcElts;
+ int Lo = Scale * ExtractIdx;
+ int Hi = Scale * (ExtractIdx + 1);
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i)
+ if (i < Lo || Hi <= i)
+ Mask[i] = SM_SentinelUndef;
+
SmallVector<int, 16> WidenedMask;
while (Mask.size() > NumSrcElts &&
canWidenShuffleElements(Mask, WidenedMask))
@@ -32532,11 +33973,14 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// If this is a *dynamic* select (non-constant condition) and we can match
/// this node with one of the variable blend instructions, restructure the
/// condition so that blends can use the high (sign) bit of each element.
-static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
+/// This function will also call SimplfiyDemandedBits on already created
+/// BLENDV to perform additional simplifications.
+static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
- if (N->getOpcode() != ISD::VSELECT ||
+ if ((N->getOpcode() != ISD::VSELECT &&
+ N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
@@ -32578,7 +34022,9 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
// TODO: Add other opcodes eventually lowered into BLEND.
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI)
- if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
+ if ((UI->getOpcode() != ISD::VSELECT &&
+ UI->getOpcode() != X86ISD::BLENDV) ||
+ UI.getOperandNo() != 0)
return SDValue();
APInt DemandedMask(APInt::getSignMask(BitWidth));
@@ -32594,9 +34040,13 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
// optimizations as we messed with the actual expectation for the vector
// boolean values.
for (SDNode *U : Cond->uses()) {
- SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U), U->getValueType(0),
+ if (U->getOpcode() == X86ISD::BLENDV)
+ continue;
+
+ SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
Cond, U->getOperand(1), U->getOperand(2));
DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+ DCI.AddToWorklist(U);
}
DCI.CommitTargetLoweringOpt(TLO);
return SDValue(N, 0);
@@ -32608,9 +34058,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
- // Get the LHS/RHS of the select.
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
+
+ // Try simplification again because we use this function to optimize
+ // BLENDV nodes that are not handled by the generic combiner.
+ if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
+ return V;
+
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -32618,18 +34073,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Convert vselects with constant condition into shuffles.
if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
DCI.isBeforeLegalizeOps()) {
- SmallVector<int, 64> Mask(VT.getVectorNumElements(), -1);
- for (int i = 0, Size = Mask.size(); i != Size; ++i) {
- SDValue CondElt = Cond->getOperand(i);
- Mask[i] = i;
- // Arbitrarily choose from the 2nd operand if the select condition element
- // is undef.
- // TODO: Can we do better by matching patterns such as even/odd?
- if (CondElt.isUndef() || isNullConstant(CondElt))
- Mask[i] += Size;
- }
-
- return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
+ SmallVector<int, 64> Mask;
+ if (createShuffleMaskFromVSELECT(Mask, Cond))
+ return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
}
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
@@ -32814,7 +34260,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Since SKX these selects have a proper lowering.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
- VT.getVectorNumElements() > 4 &&
+ (ExperimentalVectorWideningLegalization ||
+ VT.getVectorNumElements() > 4) &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -32855,15 +34302,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
- // Early exit check
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
- ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
- (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ // psubus is available in SSE2 for i8 and i16 vectors.
+ Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
+ (VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16)) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -32877,37 +34322,31 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
if (Other.getNode() && Other->getNumOperands() == 2 &&
- DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+ Other->getOperand(0) == Cond.getOperand(0)) {
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
- };
-
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> subus x, y
// x > y ? x-y : 0 --> subus x, y
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
- Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
- if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
if (isa<BuildVectorSDNode>(CondRHS)) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
- auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ // TODO: Handle build_vectors with undef elements.
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
};
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) {
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), OpRHS);
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
// Another special case: If C was a sign bit, the sub has been
@@ -32915,24 +34354,82 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
- if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode())
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignMask()) {
- OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
+ }
}
+ }
+ }
+ }
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // paddus is available in SSE2 for i8 and i16 vectors.
+ Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
+ (VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ SDValue CondLHS = Cond->getOperand(0);
+ SDValue CondRHS = Cond->getOperand(1);
+
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ if (ISD::isBuildVectorAllOnes(LHS.getNode())) {
+ Other = RHS;
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (ISD::isBuildVectorAllOnes(RHS.getNode())) {
+ Other = LHS;
+ }
+
+ if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (CC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ CC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> addus x, y
+ // x+y >= x ? x+y : ~0 --> addus x, y
+ if (CC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > ~C ? x+C : ~0 --> addus x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (CC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
}
}
+ // Early exit check
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
return V;
- if (SDValue V = combineVSelectToShrunkBlend(N, DAG, DCI, Subtarget))
+ if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
return V;
// Custom action for SELECT MMX
@@ -33014,16 +34511,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- // If the comparision uses the CF flag we can't use INC/DEC instructions.
- bool NeedCF = false;
- switch (CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
- NeedCF = true;
- break;
- }
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
@@ -33453,10 +34941,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
SDValue Add = TrueOp;
SDValue Const = FalseOp;
// Canonicalize the condition code for easier matching and output.
- if (CC == X86::COND_E) {
+ if (CC == X86::COND_E)
std::swap(Add, Const);
- CC = X86::COND_NE;
- }
+
+ // We might have replaced the constant in the cmov with the LHS of the
+ // compare. If so change it to the RHS of the compare.
+ if (Const == Cond.getOperand(0))
+ Const = Cond.getOperand(1);
// Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
@@ -33468,7 +34959,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// This should constant fold.
SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
- DAG.getConstant(CC, DL, MVT::i8), Cond);
+ DAG.getConstant(X86::COND_NE, DL, MVT::i8),
+ Cond);
return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
}
}
@@ -33490,40 +34982,8 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
for (unsigned i = 0; i < 2; i++) {
SDValue Opd = N->getOperand(i);
- // DAG.ComputeNumSignBits return 1 for ISD::ANY_EXTEND, so we need to
- // compute signbits for it separately.
- if (Opd.getOpcode() == ISD::ANY_EXTEND) {
- // For anyextend, it is safe to assume an appropriate number of leading
- // sign/zero bits.
- if (Opd.getOperand(0).getValueType().getVectorElementType() == MVT::i8)
- SignBits[i] = 25;
- else if (Opd.getOperand(0).getValueType().getVectorElementType() ==
- MVT::i16)
- SignBits[i] = 17;
- else
- return false;
- IsPositive[i] = true;
- } else if (Opd.getOpcode() == ISD::BUILD_VECTOR) {
- // All the operands of BUILD_VECTOR need to be int constant.
- // Find the smallest value range which all the operands belong to.
- SignBits[i] = 32;
- IsPositive[i] = true;
- for (const SDValue &SubOp : Opd.getNode()->op_values()) {
- if (SubOp.isUndef())
- continue;
- auto *CN = dyn_cast<ConstantSDNode>(SubOp);
- if (!CN)
- return false;
- APInt IntVal = CN->getAPIntValue();
- if (IntVal.isNegative())
- IsPositive[i] = false;
- SignBits[i] = std::min(SignBits[i], IntVal.getNumSignBits());
- }
- } else {
- SignBits[i] = DAG.ComputeNumSignBits(Opd);
- if (Opd.getOpcode() == ISD::ZERO_EXTEND)
- IsPositive[i] = true;
- }
+ SignBits[i] = DAG.ComputeNumSignBits(Opd);
+ IsPositive[i] = DAG.SignBitIsZero(Opd);
}
bool AllPositive = IsPositive[0] && IsPositive[1];
@@ -33608,90 +35068,90 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
- if (NumElts >= OpsVT.getVectorNumElements()) {
+ if (ExperimentalVectorWideningLegalization ||
+ NumElts >= OpsVT.getVectorNumElements()) {
// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
// lower part is needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
- if (Mode == MULU8 || Mode == MULS8) {
+ if (Mode == MULU8 || Mode == MULS8)
return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
DL, VT, MulLo);
- } else {
- MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
- // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
- // the higher part is also needed.
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- ReducedVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result.
- // Generate shuffle functioning as punpcklwd.
- SmallVector<int, 16> ShuffleMask(NumElts);
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i;
- ShuffleMask[2 * i + 1] = i + NumElts;
- }
- SDValue ResLo =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResLo = DAG.getBitcast(ResVT, ResLo);
- // Generate shuffle functioning as punpckhwd.
- for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
- ShuffleMask[2 * i] = i + NumElts / 2;
- ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
- }
- SDValue ResHi =
- DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
- ResHi = DAG.getBitcast(ResVT, ResHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
- }
- } else {
- // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
- // to legalize the mul explicitly because implicit legalization for type
- // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
- // instructions which will not exist when we explicitly legalize it by
- // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
- // <4 x i16> undef).
- //
- // Legalize the operands of mul.
- // FIXME: We may be able to handle non-concatenated vectors by insertion.
- unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
- if ((RegSize % ReducedSizeInBits) != 0)
- return SDValue();
- SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
- DAG.getUNDEF(ReducedVT));
- Ops[0] = NewN0;
- NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
- Ops[0] = NewN1;
- NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-
- if (Mode == MULU8 || Mode == MULS8) {
- // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
- // part is needed.
- SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-
- // convert the type of mul result to VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG,
- DL, ResVT, Mul);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- } else {
- // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
- // MULU16/MULS16, both parts are needed.
- SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
- SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
- OpsVT, NewN0, NewN1);
-
- // Repack the lower part and higher part result of mul into a wider
- // result. Make sure the type of mul result is VT.
- MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
- SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
- Res = DAG.getBitcast(ResVT, Res);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getIntPtrConstant(0, DL));
- }
+ MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+ // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
+ // the higher part is also needed.
+ SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+ ReducedVT, NewN0, NewN1);
+
+ // Repack the lower part and higher part result of mul into a wider
+ // result.
+ // Generate shuffle functioning as punpcklwd.
+ SmallVector<int, 16> ShuffleMask(NumElts);
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i;
+ ShuffleMask[2 * i + 1] = i + NumElts;
+ }
+ SDValue ResLo =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResLo = DAG.getBitcast(ResVT, ResLo);
+ // Generate shuffle functioning as punpckhwd.
+ for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+ ShuffleMask[2 * i] = i + NumElts / 2;
+ ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
+ }
+ SDValue ResHi =
+ DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+ ResHi = DAG.getBitcast(ResVT, ResHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
+ }
+
+ // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
+ // to legalize the mul explicitly because implicit legalization for type
+ // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
+ // instructions which will not exist when we explicitly legalize it by
+ // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
+ // <4 x i16> undef).
+ //
+ // Legalize the operands of mul.
+ // FIXME: We may be able to handle non-concatenated vectors by insertion.
+ unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
+ if ((RegSize % ReducedSizeInBits) != 0)
+ return SDValue();
+
+ SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
+ DAG.getUNDEF(ReducedVT));
+ Ops[0] = NewN0;
+ NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+ Ops[0] = NewN1;
+ NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+
+ if (Mode == MULU8 || Mode == MULS8) {
+ // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
+ // part is needed.
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+
+ // convert the type of mul result to VT.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+ SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
+ : ISD::SIGN_EXTEND_VECTOR_INREG,
+ DL, ResVT, Mul);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
+
+ // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
+ // MULU16/MULS16, both parts are needed.
+ SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+ SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+ OpsVT, NewN0, NewN1);
+
+ // Repack the lower part and higher part result of mul into a wider
+ // result. Make sure the type of mul result is VT.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+ SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
+ Res = DAG.getBitcast(ResVT, Res);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
@@ -33781,13 +35241,13 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
}
// If the upper 17 bits of each element are zero then we can use PMADDWD,
-// which is always at least as quick as PMULLD, expect on KNL.
+// which is always at least as quick as PMULLD, except on KNL.
static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasSSE2())
return SDValue();
- if (Subtarget.getProcFamily() == X86Subtarget::IntelKNL)
+ if (Subtarget.isPMADDWDSlow())
return SDValue();
EVT VT = N->getValueType(0);
@@ -33797,12 +35257,24 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
+ // Also allow v2i32 if it will be widened.
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (!DAG.getTargetLoweringInfo().isTypeLegal(WVT))
+ if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
+ DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // If we are zero extending two steps without SSE4.1, its better to reduce
+ // the vmul width instead.
+ if (!Subtarget.hasSSE41() &&
+ (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+ return SDValue();
+
APInt Mask17 = APInt::getHighBitsSet(32, 17);
if (!DAG.MaskedValueIsZero(N1, Mask17) ||
!DAG.MaskedValueIsZero(N0, Mask17))
@@ -33828,7 +35300,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
// Only support vXi64 vectors.
if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 ||
- !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ VT.getVectorNumElements() < 2 ||
+ !isPowerOf2_32(VT.getVectorNumElements()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -33929,10 +35402,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
(SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
- !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ !(SignMulAmt >= 0 && N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::ADD))
// If second multiplifer is pow2, issue it first. We want the multiply by
// 3, 5, or 9 to be folded into the addressing mode unless the lone use
- // is an add.
+ // is an add. Only do this for positive multiply amounts since the
+ // negate would prevent it from being used as an address mode anyway.
std::swap(MulAmt1, MulAmt2);
if (isPowerOf2_64(MulAmt1))
@@ -34197,6 +35672,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
"Unexpected PACKSS/PACKUS input type");
+ bool IsSigned = (X86ISD::PACKSS == Opcode);
+
// Constant Folding.
APInt UndefElts0, UndefElts1;
SmallVector<APInt, 32> EltBits0, EltBits1;
@@ -34209,7 +35686,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
unsigned NumSrcElts = NumDstElts / 2;
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
- bool IsSigned = (X86ISD::PACKSS == Opcode);
APInt Undefs(NumDstElts, 0);
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@@ -34253,16 +35729,58 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
+ // truncate to create a larger truncate.
+ if (Subtarget.hasAVX512() &&
+ N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+ N0.getOperand(0).getValueType() == MVT::v8i32) {
+ if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
+ (!IsSigned &&
+ DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
+ if (Subtarget.hasVLX())
+ return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ // Widen input to v16i32 so we can truncate that.
+ SDLoc dl(N);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
+ N0.getOperand(0), DAG.getUNDEF(MVT::v8i32));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);
+ }
+ }
+
// Attempt to combine as shuffle.
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
}
+static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
+ X86ISD::VSRL == N->getOpcode()) &&
+ "Unexpected shift opcode");
+ EVT VT = N->getValueType(0);
+
+ // Shift zero -> zero.
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -34277,13 +35795,14 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
+ assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
- APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
- if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
+ unsigned ShiftVal = cast<ConstantSDNode>(N1)->getZExtValue();
+ if (ShiftVal >= NumBitsPerElt) {
if (LogicalShift)
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ return DAG.getConstant(0, SDLoc(N), VT);
else
ShiftVal = NumBitsPerElt - 1;
}
@@ -34294,30 +35813,25 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// Shift zero -> zero.
if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
-
- // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
- // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
- // TODO - support other sra opcodes as needed.
- if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
- N0.getOpcode() == X86ISD::VSRAI)
- return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
-
- // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
- if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI &&
- N1 == N0.getOperand(1)) {
- SDValue N00 = N0.getOperand(0);
- unsigned NumSignBits = DAG.ComputeNumSignBits(N00);
- if (ShiftVal.ult(NumSignBits))
- return N00;
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2)
+ // clamped to (NumBitsPerElt - 1).
+ if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI) {
+ unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ unsigned NewShiftVal = ShiftVal + ShiftVal2;
+ if (NewShiftVal >= NumBitsPerElt)
+ NewShiftVal = NumBitsPerElt - 1;
+ return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
// We can decode 'whole byte' logical bit shifts as shuffles.
- if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
+ if (LogicalShift && (ShiftVal % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34328,18 +35842,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
- unsigned ShiftImm = ShiftVal.getZExtValue();
for (APInt &Elt : EltBits) {
if (X86ISD::VSHLI == Opcode)
- Elt <<= ShiftImm;
+ Elt <<= ShiftVal;
else if (X86ISD::VSRAI == Opcode)
- Elt.ashrInPlace(ShiftImm);
+ Elt.ashrInPlace(ShiftVal);
else
- Elt.lshrInPlace(ShiftImm);
+ Elt.lshrInPlace(ShiftVal);
}
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -34356,7 +35874,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
@@ -34468,42 +35987,31 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Try to match (and (xor X, -1), Y) logic pattern for (andnp X, Y) combines.
-static bool matchANDXORWithAllOnesAsANDNP(SDNode *N, SDValue &X, SDValue &Y) {
- if (N->getOpcode() != ISD::AND)
- return false;
+/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
+static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::AND);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ MVT VT = N->getSimpleValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ SDValue X, Y;
+ SDValue N0 = peekThroughBitcasts(N->getOperand(0));
+ SDValue N1 = peekThroughBitcasts(N->getOperand(1));
if (N0.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) {
X = N0.getOperand(0);
Y = N1;
- return true;
- }
- if (N1.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
+ } else if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
X = N1.getOperand(0);
Y = N0;
- return true;
- }
-
- return false;
-}
-
-/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
-static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::AND);
-
- EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
+ } else
return SDValue();
- SDValue X, Y;
- if (matchANDXORWithAllOnesAsANDNP(N, X, Y))
- return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
-
- return SDValue();
+ X = DAG.getBitcast(VT, X);
+ Y = DAG.getBitcast(VT, Y);
+ return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
@@ -34512,8 +36020,8 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
// some of the transition sequences.
// Even with AVX-512 this is still useful for removing casts around logical
// operations on vXi1 mask types.
-static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
assert(VT.isVector() && "Expected vector type");
@@ -34628,6 +36136,10 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
!SplatVal.isMask())
return SDValue();
+ // Don't prevent creation of ANDN.
+ if (isBitwiseNot(Op0))
+ return SDValue();
+
if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
@@ -34761,6 +36273,73 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
return SDValue();
}
+// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
+// Turn it into series of XORs and a setnp.
+static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+
+ // We only support 64-bit and 32-bit. 64-bit requires special handling
+ // unless the 64-bit popcnt instruction is legal.
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // LHS needs to be a single use CTPOP.
+ if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
+ return SDValue();
+
+ // RHS needs to be 1.
+ if (!isOneConstant(N1))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue X = N0.getOperand(0);
+
+ // If this is 64-bit, its always best to xor the two 32-bit pieces together
+ // even if we have popcnt.
+ if (VT == MVT::i64) {
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(32, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
+ // Generate a 32-bit parity idiom. This will bring us back here if we need
+ // to expand it too.
+ SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
+ DAG.getConstant(1, DL, MVT::i32));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity);
+ }
+ assert(VT == MVT::i32 && "Unexpected VT!");
+
+ // Xor the high and low 16-bits together using a 32-bit operation.
+ SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(16, DL, MVT::i8));
+ X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
+
+ // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
+ // This should allow an h-reg to be used to save a shift.
+ // FIXME: We only get an h-reg in 32-bit mode.
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
+ DAG.getNode(ISD::SRL, DL, VT, X,
+ DAG.getConstant(8, DL, MVT::i8)));
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+ SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
+ SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
+
+ // Copy the inverse of the parity flag into a register with setcc.
+ SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+ // Zero extend to original type.
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
+}
+
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -34788,6 +36367,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
+ // This must be done before legalization has expanded the ctpop.
+ if (SDValue V = combineParity(N, DAG, Subtarget))
+ return V;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -34811,7 +36394,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34848,7 +36431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
}
@@ -34978,7 +36561,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE41())
return SDValue();
- MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+ MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
Y = DAG.getBitcast(BlendVT, Y);
@@ -35122,11 +36705,21 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
+ // Attempt to recursively combine an OR of shuffles.
+ if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ SDValue Op(N, 0);
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ return Res;
+ }
+
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ unsigned Bits = VT.getScalarSizeInBits();
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
@@ -35149,6 +36742,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
+
+ // Peek through any modulo shift masks.
+ SDValue ShMsk0;
+ if (ShAmt0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
+ ShAmt0.getConstantOperandVal(1) == (Bits - 1)) {
+ ShMsk0 = ShAmt0;
+ ShAmt0 = ShAmt0.getOperand(0);
+ }
+ SDValue ShMsk1;
+ if (ShAmt1.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
+ ShAmt1.getConstantOperandVal(1) == (Bits - 1)) {
+ ShMsk1 = ShAmt1;
+ ShAmt1 = ShAmt1.getOperand(0);
+ }
+
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
@@ -35163,27 +36773,29 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
+ std::swap(ShMsk0, ShMsk1);
}
// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
// OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
// OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
- unsigned Bits = VT.getSizeInBits();
+ // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
+ // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
- if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
- if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
- return DAG.getNode(Opc, DL, VT,
- Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL,
- MVT::i8, ShAmt0));
- }
- } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
- ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if ((SumC->getAPIntValue() == Bits ||
+ (SumC->getAPIntValue() == 0 && ShMsk1)) &&
+ ShAmt1Op1 == ShAmt0)
+ return DAG.getNode(Opc, DL, VT, Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ }
+ } else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
@@ -35191,12 +36803,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
MVT::i8, ShAmt0));
} else if (ShAmt1.getOpcode() == ISD::XOR) {
SDValue Mask = ShAmt1.getOperand(1);
- if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
+ if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
- if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
+ if (MaskC->getSExtValue() == (Bits - 1) &&
+ (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
Op1.getConstantOperandVal(1) == 1) {
@@ -35207,7 +36820,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
Op1.getOperand(0) == Op1.getOperand(1)) {
return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
}
}
}
@@ -35478,6 +37091,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
}
if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
+ !Subtarget.hasAVX512() &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
@@ -35514,7 +37128,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
EVT ScalarVT = VT.getVectorElementType();
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
- isPowerOf2_32(NumElems)))
+ NumElems >= 2 && isPowerOf2_32(NumElems)))
return SDValue();
// InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -35752,8 +37366,8 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
Alignment, ML->getMemOperand()->getFlags());
// Insert the loaded element into the appropriate place in the vector.
- SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
- Load, VecIndex);
+ SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ ML->getPassThru(), Load, VecIndex);
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
}
@@ -35776,7 +37390,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
if (LoadFirstElt && LoadLastElt) {
SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMemOperand());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getSrc0());
+ SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
+ ML->getPassThru());
return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
}
@@ -35786,7 +37401,7 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
// Don't try this if the pass-through operand is already undefined. That would
// cause an infinite loop because that's what we're about to create.
- if (ML->getSrc0().isUndef())
+ if (ML->getPassThru().isUndef())
return SDValue();
// The new masked load has an undef pass-through operand. The select uses the
@@ -35795,7 +37410,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
ML->getMask(), DAG.getUNDEF(VT),
ML->getMemoryVT(), ML->getMemOperand(),
ML->getExtensionType());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getSrc0());
+ SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
+ ML->getPassThru());
return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
}
@@ -35842,9 +37458,9 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- // Convert Src0 value.
- SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
- if (!Mld->getSrc0().isUndef()) {
+ // Convert PassThru value.
+ SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru());
+ if (!Mld->getPassThru().isUndef()) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
@@ -35852,7 +37468,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
// Can't shuffle using an illegal type.
assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
"WideVecVT should be legal");
- WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
+ WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
DAG.getUNDEF(WideVecVT), ShuffleVec);
}
@@ -35885,10 +37501,10 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
- Mld->getBasePtr(), NewMask, WideSrc0,
+ Mld->getBasePtr(), NewMask, WidePassThru,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
+ SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
@@ -35920,31 +37536,25 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
}
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
-
if (Mst->isCompressingStore())
return SDValue();
+ EVT VT = Mst->getValue().getValueType();
if (!Mst->isTruncatingStore()) {
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
return ScalarStore;
- // If the mask is checking (0 > X), we're creating a vector with all-zeros
- // or all-ones elements based on the sign bits of X. AVX1 masked store only
- // cares about the sign bit of each mask element, so eliminate the compare:
- // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
- // Note that by waiting to match an x86-specific PCMPGT node, we're
- // eliminating potentially more complex matching of a setcc node which has
- // a full range of predicates.
+ // If the mask value has been legalized to a non-boolean vector, try to
+ // simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
- if (Mask.getOpcode() == X86ISD::PCMPGT &&
- ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
- assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
- "Unexpected type for PCMPGT");
- return DAG.getMaskedStore(
- Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
- Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ return SDValue(N, 0);
}
// TODO: AVX512 targets should also be able to simplify something like the
@@ -35955,7 +37565,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
// Resolve truncating stores.
- EVT VT = Mst->getValue().getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
@@ -36043,6 +37652,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Convert a store of vXi1 into a store of iX and a bitcast.
+ if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1) {
+
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
// If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
// This will avoid a copy to k-register.
if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
@@ -36269,7 +37890,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget.is64Bit() || F64IsLegal) {
- MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
+ MVT LdVT = (Subtarget.is64Bit() &&
+ (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getMemOperand());
@@ -36343,10 +37965,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
-/// Note that the binary operation should have the property that if one of the
-/// operands is UNDEF then the result is UNDEF.
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
- // Look for the following pattern: if
+ // If either operand is undef, bail out. The binop should be simplified.
+ if (LHS.isUndef() || RHS.isUndef())
+ return false;
+
+ // Look for the following pattern:
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
// and
@@ -36361,25 +37985,15 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
return false;
MVT VT = LHS.getSimpleValueType();
-
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
- // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
- // operate independently on 128-bit lanes.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts / NumLanes;
- assert((NumLaneElts % 2 == 0) &&
- "Vector type should have an even number of elements in each lane");
- unsigned HalfLaneElts = NumLaneElts/2;
-
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
- // If LHS is not a shuffle then pretend it is the shuffle
+ // If LHS is not a shuffle, then pretend it is the identity shuffle:
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
- // NOTE: in what follows a default initialized SDValue represents an UNDEF of
- // type VT.
+ // NOTE: A default initialized SDValue represents an UNDEF of type VT.
+ unsigned NumElts = VT.getVectorNumElements();
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
@@ -36388,10 +38002,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
if (!LHS.getOperand(1).isUndef())
B = LHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
- std::copy(Mask.begin(), Mask.end(), LMask.begin());
+ llvm::copy(Mask, LMask.begin());
} else {
- if (!LHS.isUndef())
- A = LHS;
+ A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
@@ -36406,45 +38019,51 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
if (!RHS.getOperand(1).isUndef())
D = RHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
- std::copy(Mask.begin(), Mask.end(), RMask.begin());
+ llvm::copy(Mask, RMask.begin());
} else {
- if (!RHS.isUndef())
- C = RHS;
+ C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
+ // If A and B occur in reverse order in RHS, then canonicalize by commuting
+ // RHS operands and shuffle mask.
+ if (A != C) {
+ std::swap(C, D);
+ ShuffleVectorSDNode::commuteMask(RMask);
+ }
// Check that the shuffles are both shuffling the same vectors.
- if (!(A == C && B == D) && !(A == D && B == C))
- return false;
-
- // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
- if (!A.getNode() && !B.getNode())
+ if (!(A == C && B == D))
return false;
- // If A and B occur in reverse order in RHS, then "swap" them (which means
- // rewriting the mask).
- if (A != C)
- ShuffleVectorSDNode::commuteMask(RMask);
-
- // At this point LHS and RHS are equivalent to
- // LHS = VECTOR_SHUFFLE A, B, LMask
- // RHS = VECTOR_SHUFFLE A, B, RMask
+ // LHS and RHS are now:
+ // LHS = shuffle A, B, LMask
+ // RHS = shuffle A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- int LIdx = LMask[i+l], RIdx = RMask[i+l];
-
- // Ignore any UNDEF components.
+ // AVX defines horizontal add/sub to operate independently on 128-bit lanes,
+ // so we just repeat the inner loop if this is a 256-bit op.
+ unsigned Num128BitChunks = VT.getSizeInBits() / 128;
+ unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
+ assert((NumEltsPer128BitChunk % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
+ for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
+ // Ignore undefined components.
+ int LIdx = LMask[i + j], RIdx = RMask[i + j];
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
- // Check that successive elements are being operated on. If not, this is
+ // The low half of the 128-bit result must choose from A.
+ // The high half of the 128-bit result must choose from B,
+ // unless B is undef. In that case, we are always choosing from A.
+ unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
+ unsigned Src = B.getNode() ? i >= NumEltsPer64BitChunk : 0;
+
+ // Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
- int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
+ int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
@@ -36463,21 +38082,24 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
bool IsFadd = N->getOpcode() == ISD::FADD;
+ auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- isHorizontalBinOp(LHS, RHS, IsFadd)) {
- auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
- return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
- }
+ isHorizontalBinOp(LHS, RHS, IsFadd) &&
+ shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+ return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+
return SDValue();
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove
+/// anything that is guaranteed to be transformed by DAGCombiner.
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
@@ -36489,34 +38111,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
- auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
+ auto IsFreeTruncation = [VT](SDValue Op) {
unsigned TruncSizeInBits = VT.getScalarSizeInBits();
- // Repeated operand, so we are only trading one output truncation for
- // one input truncation.
- if (Op0 == Op1)
- return true;
-
- // See if either operand has been extended from a smaller/equal size to
+ // See if this has been extended from a smaller/equal size to
// the truncation size, allowing a truncation to combine with the extend.
- unsigned Opcode0 = Op0.getOpcode();
- if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
- Opcode0 == ISD::ZERO_EXTEND) &&
- Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
- return true;
-
- unsigned Opcode1 = Op1.getOpcode();
- if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
- Opcode1 == ISD::ZERO_EXTEND) &&
- Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ unsigned Opcode = Op.getOpcode();
+ if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND ||
+ Opcode == ISD::ZERO_EXTEND) &&
+ Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
return true;
- // See if either operand is a single use constant which can be constant
- // folded.
- SDValue BC0 = peekThroughOneUseBitcasts(Op0);
- SDValue BC1 = peekThroughOneUseBitcasts(Op1);
- return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
- ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+ // See if this is a single use constant which can be constant folded.
+ SDValue BC = peekThroughOneUseBitcasts(Op);
+ return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
@@ -36526,7 +38134,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
};
// Don't combine if the operation has other uses.
- if (!N->isOnlyUserOf(Src.getNode()))
+ if (!Src.hasOneUse())
return SDValue();
// Only support vector truncation for now.
@@ -36544,7 +38152,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
- IsRepeatedOpOrFreeTruncation(Op0, Op1))
+ (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -36557,11 +38165,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
- // TODO: ISD::SUB should be here but interferes with combineSubToSubus.
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
- IsRepeatedOpOrFreeTruncation(Op0, Op1))
+ (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
+ return TruncateArithmetic(Op0, Op1);
+ break;
+ }
+ case ISD::SUB: {
+ // TODO: ISD::SUB We are conservative and require both sides to be freely
+ // truncatable to avoid interfering with combineSubToSubus.
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ if (TLI.isOperationLegal(Opcode, VT) &&
+ (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -36701,8 +38318,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// Use PACKUS if the input has zero-bits that extend all the way to the
// packed/truncated value. e.g. masks, zext_in_reg, etc.
- KnownBits Known;
- DAG.computeKnownBits(In, Known);
+ KnownBits Known = DAG.computeKnownBits(In);
unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);
@@ -36733,9 +38349,11 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
if (!Subtarget.hasSSE2())
return SDValue();
- // Only handle vXi16 types that are at least 128-bits.
+ // Only handle vXi16 types that are at least 128-bits unless they will be
+ // widened.
if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 ||
- VT.getVectorNumElements() < 8)
+ (!ExperimentalVectorWideningLegalization &&
+ VT.getVectorNumElements() < 8))
return SDValue();
// Input type should be vXi32.
@@ -36951,29 +38569,72 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
/// Returns the negated value if the node \p N flips sign of FP value.
///
-/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
+/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
+/// or FSUB(0, x)
/// AVX512F does not have FXOR, so FNEG is lowered as
/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
/// In this case we go though all bitcasts.
-static SDValue isFNEG(SDNode *N) {
+/// This also recognizes splat of a negated value and returns the splat of that
+/// value.
+static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
- if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
+ auto VT = Op->getValueType(0);
+ if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
+ // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
+ // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
+ if (!SVOp->getOperand(1).isUndef())
+ return SDValue();
+ if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
+ return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+ SVOp->getMask());
+ return SDValue();
+ }
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::INSERT_VECTOR_ELT) {
+ // Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
+ // -V, INDEX).
+ SDValue InsVector = Op.getOperand(0);
+ SDValue InsVal = Op.getOperand(1);
+ if (!InsVector.isUndef())
+ return SDValue();
+ if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+ NegInsVal, Op.getOperand(2));
+ return SDValue();
+ }
+
+ if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
return SDValue();
SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
if (!Op1.getValueType().isFloatingPoint())
return SDValue();
- // Extract constant bits and see if they are all sign bit masks.
+ SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+
+ // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
+ // masks. For FSUB, we have to check if constant bits of Op0 are sign bit
+ // masks and hence we swap the operands.
+ if (Opc == ISD::FSUB)
+ std::swap(Op0, Op1);
+
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
+ // Extract constant bits and see if they are all sign bit masks. Ignore the
+ // undef elements.
if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
- UndefElts, EltBits, false, false))
- if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
- return peekThroughBitcasts(Op.getOperand(0));
+ UndefElts, EltBits,
+ /* AllowWholeUndefs */ true,
+ /* AllowPartialUndefs */ false)) {
+ for (unsigned I = 0, E = EltBits.size(); I < E; I++)
+ if (!UndefElts[I] && !EltBits[I].isSignMask())
+ return SDValue();
+
+ return peekThroughBitcasts(Op0);
+ }
return SDValue();
}
@@ -36982,8 +38643,9 @@ static SDValue isFNEG(SDNode *N) {
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT OrigVT = N->getValueType(0);
- SDValue Arg = isFNEG(N);
- assert(Arg.getNode() && "N is expected to be an FNEG node");
+ SDValue Arg = isFNEG(DAG, N);
+ if (!Arg)
+ return SDValue();
EVT VT = Arg.getValueType();
EVT SVT = VT.getScalarType();
@@ -37033,25 +38695,27 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
// If we have integer vector types available, use the integer opcodes.
- if (VT.isVector() && Subtarget.hasSSE2()) {
- SDLoc dl(N);
+ if (!VT.isVector() || !Subtarget.hasSSE2())
+ return SDValue();
- MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ SDLoc dl(N);
- SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
- SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
- unsigned IntOpcode;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected FP logic op");
- case X86ISD::FOR: IntOpcode = ISD::OR; break;
- case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
- case X86ISD::FAND: IntOpcode = ISD::AND; break;
- case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
- }
- SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
- return DAG.getBitcast(VT, IntOp);
+ unsigned IntBits = VT.getScalarSizeInBits();
+ MVT IntSVT = MVT::getIntegerVT(IntBits);
+ MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);
+
+ SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
+ unsigned IntOpcode;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
}
- return SDValue();
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getBitcast(VT, IntOp);
}
@@ -37098,9 +38762,7 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
- if (isFNEG(N))
- return combineFneg(N, DAG, Subtarget);
- return SDValue();
+ return combineFneg(N, DAG, Subtarget);
}
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
@@ -37112,8 +38774,6 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
unsigned NumBits = VT.getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
// TODO - Constant Folding.
if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
@@ -37127,12 +38787,9 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
}
// Only bottom 16-bits of the control bits are required.
- KnownBits Known;
APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
- if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(Op1, DemandedMask, DCI))
return SDValue(N, 0);
- }
return SDValue();
}
@@ -37233,9 +38890,8 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
return N->getOperand(0);
- if (isFNEG(N))
- if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
- return NewVal;
+ if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
+ return NewVal;
return lowerX86FPLogicOp(N, DAG, Subtarget);
}
@@ -37320,26 +38976,47 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
}
+static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ MVT VT = N->getSimpleValueType(0);
+
// ANDNP(0, x) -> x
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
return N->getOperand(1);
// ANDNP(x, 0) -> 0
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
- return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
+ return DAG.getConstant(0, SDLoc(N), VT);
- EVT VT = N->getValueType(0);
+ // Turn ANDNP back to AND if input is inverted.
+ if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
+ return DAG.getNode(ISD::AND, SDLoc(N), VT,
+ N->getOperand(0).getOperand(0), N->getOperand(1));
+ }
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -37502,36 +39179,6 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
}
-/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
-/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
-/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
-/// extends from AH (which we otherwise need to do contortions to access).
-static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
- SDValue N0 = N->getOperand(0);
- auto OpcodeN = N->getOpcode();
- auto OpcodeN0 = N0.getOpcode();
- if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
- (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
- return SDValue();
-
- EVT VT = N->getValueType(0);
- EVT InVT = N0.getValueType();
- if (N0.getResNo() != 1 || InVT != MVT::i8 ||
- !(VT == MVT::i32 || VT == MVT::i64))
- return SDValue();
-
- SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32);
- auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
- : X86ISD::UDIVREM8_ZEXT_HREG;
- SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
- N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- // If this was a 64-bit extend, complete it.
- if (VT == MVT::i64)
- return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1));
- return R.getValue(1);
-}
-
// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
// operands and the result of CMOV is not used anywhere else - promote CMOV
// itself instead of promoting its result. This could be beneficial, because:
@@ -37685,6 +39332,9 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ if (ExperimentalVectorWideningLegalization)
+ return SDValue();
+
unsigned Opcode = N->getOpcode();
if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
return SDValue();
@@ -37699,17 +39349,33 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
EVT InSVT = InVT.getScalarType();
+ // FIXME: Generic DAGCombiner previously had a bug that would cause a
+ // sign_extend of setcc to sometimes return the original node and tricked it
+ // into thinking CombineTo was used which prevented the target combines from
+ // running.
+ // Earlying out here to avoid regressions like this
+ // (v4i32 (sext (v4i1 (setcc (v4i16)))))
+ // Becomes
+ // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef))))
+ // Type legalized to
+ // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32)))))))
+ // Leading to a packssdw+pmovsxwd
+ // We could write a DAG combine to fix this, but really we shouldn't be
+ // creating sext_invec that's forcing v8i16 into the DAG.
+ if (N0.getOpcode() == ISD::SETCC)
+ return SDValue();
+
// Input type must be a vector and we must be extending legal integer types.
- if (!VT.isVector())
+ if (!VT.isVector() || VT.getVectorNumElements() < 2)
return SDValue();
if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
return SDValue();
if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
return SDValue();
- // On AVX2+ targets, if the input/output types are both legal then we will be
- // able to use SIGN_EXTEND/ZERO_EXTEND directly.
- if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ // If the input/output types are both legal then we have at least AVX1 and
+ // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
@@ -37737,16 +39403,16 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}
- // If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
+ // If target-size is 128-bits (or 256-bits on AVX target), then convert to
// ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
// Also use this if we don't have SSE41 to allow the legalizer do its job.
if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is256BitVector() && Subtarget.hasAVX()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) {
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
- return Opcode == ISD::SIGN_EXTEND
- ? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
- : DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
+ Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+ return DAG.getNode(Opcode, DL, VT, ExOp);
}
auto SplitAndExtendInReg = [&](unsigned SplitSize) {
@@ -37755,22 +39421,23 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
+ unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
- SrcVec = Opcode == ISD::SIGN_EXTEND
- ? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
- : DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
+ SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec);
Opnds.push_back(SrcVec);
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
};
- // On pre-AVX2 targets, split into 128-bit nodes of
+ // On pre-AVX targets, split into 128-bit nodes of
// ISD::*_EXTEND_VECTOR_INREG.
- if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
+ if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128))
return SplitAndExtendInReg(128);
// On pre-AVX512 targets, split into 256-bit nodes of
@@ -37832,9 +39499,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
SDLoc DL(N);
- if (SDValue DivRem8 = getDivRem8(N, DAG))
- return DivRem8;
-
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;
@@ -37861,7 +39525,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return V;
if (VT.isVector())
- if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+ if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
return R;
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
@@ -37920,7 +39584,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
SDValue C = N->getOperand(2);
auto invertIfNegative = [&DAG](SDValue &V) {
- if (SDValue NegVal = isFNEG(V.getNode())) {
+ if (SDValue NegVal = isFNEG(DAG, V.getNode())) {
V = DAG.getBitcast(V.getValueType(), NegVal);
return true;
}
@@ -37928,7 +39592,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
// new extract from the FNEG input.
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(V.getOperand(1))) {
- if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
+ if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) {
NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
NegVal, V.getOperand(1));
@@ -37961,7 +39625,7 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue NegVal = isFNEG(N->getOperand(2).getNode());
+ SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode());
if (!NegVal)
return SDValue();
@@ -38032,12 +39696,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return V;
if (VT.isVector())
- if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+ if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
return R;
- if (SDValue DivRem8 = getDivRem8(N, DAG))
- return DivRem8;
-
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
@@ -38079,12 +39740,15 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
return SDValue();
// TODO: Use PXOR + PTEST for SSE4.1 or later?
- // TODO: Add support for AVX-512.
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX2())) {
- EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
+ (OpSize == 256 && Subtarget.hasAVX2()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
+ EVT VecVT = OpSize == 512 ? MVT::v16i32 :
+ OpSize == 256 ? MVT::v32i8 :
+ MVT::v16i8;
+ EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
SDValue Cmp;
if (IsOrXorXorCCZero) {
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -38095,14 +39759,18 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
- SDValue Cmp1 = DAG.getSetCC(DL, VecVT, A, B, ISD::SETEQ);
- SDValue Cmp2 = DAG.getSetCC(DL, VecVT, C, D, ISD::SETEQ);
- Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
+ SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
+ Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
} else {
SDValue VecX = DAG.getBitcast(VecVT, X);
SDValue VecY = DAG.getBitcast(VecVT, Y);
- Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, ISD::SETEQ);
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
}
+ // For 512-bits we want to emit a setcc that will lower to kortest.
+ if (OpSize == 512)
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
+ DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
@@ -38181,7 +39849,9 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
// NOTE: The element count check is to ignore operand types that need to
// go through type promotion to a 128-bit vector.
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
- VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() > 4 &&
+ VT.getVectorElementType() == MVT::i1 &&
+ (ExperimentalVectorWideningLegalization ||
+ VT.getVectorNumElements() > 4) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) {
SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
@@ -38202,10 +39872,11 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = N->getSimpleValueType(0);
// Perform constant folding.
if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(N->getValueType(0) == MVT::i32 && "Unexpected result type");
+ assert(VT== MVT::i32 && "Unexpected result type");
APInt Imm(32, 0);
for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
SDValue In = Src.getOperand(Idx);
@@ -38213,20 +39884,53 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
Imm.setBit(Idx);
}
- return DAG.getConstant(Imm, SDLoc(N), N->getValueType(0));
+ return DAG.getConstant(Imm, SDLoc(N), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
+ // Look through int->fp bitcasts that don't change the element width.
+ if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() &&
+ SrcVT.isFloatingPoint() &&
+ Src.getOperand(0).getValueType() ==
+ EVT(SrcVT).changeVectorElementTypeToInteger())
+ Src = Src.getOperand(0);
- // MOVMSK only uses the MSB from each vector element.
- KnownBits Known;
- APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Src, DemandedMask, Known, TLO)) {
- DCI.AddToWorklist(Src.getNode());
- DCI.CommitTargetLoweringOpt(TLO);
+ // Simplify the inputs.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
+
+ // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
+ // Only do this when the setcc input and output types are the same and the
+ // setcc and the 'and' node have a single use.
+ // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
+ APInt SplatVal;
+ if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
+ Src.getOperand(0).getValueType() == Src.getValueType() &&
+ cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
+ Src.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue And = Src.getOperand(0);
+ if (And.hasOneUse() &&
+ ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
+ SplatVal.isPowerOf2()) {
+ MVT VT = Src.getSimpleValueType();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
+ SDLoc DL(And);
+ SDValue X = And.getOperand(0);
+ // If the element type is i8, we need to bitcast to i16 to use a legal
+ // shift. If we wait until lowering we end up with an extra and to bits
+ // from crossing the 8-bit elements, but we don't care about that here.
+ if (VT.getVectorElementType() == MVT::i8) {
+ VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+ X = DAG.getBitcast(VT, X);
+ }
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
+ DAG.getConstant(ShAmt, DL, VT));
+ SDValue Cast = DAG.getBitcast(SrcVT, Shl);
+ return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
+ }
}
return SDValue();
@@ -38296,16 +40000,10 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
// With AVX2 we only demand the upper bit of the mask.
if (!Subtarget.hasAVX512()) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
SDValue Mask = N->getOperand(2);
- KnownBits Known;
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) {
- DCI.AddToWorklist(Mask.getNode());
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
return SDValue(N, 0);
- }
}
return SDValue();
@@ -38396,7 +40094,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- // UINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
+ // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
@@ -38460,7 +40158,8 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
- if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
+ Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
@@ -38485,6 +40184,159 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static bool needCarryOrOverflowFlag(SDValue Flags) {
+ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ X86::CondCode CC;
+ switch (User->getOpcode()) {
+ default:
+ // Be conservative.
+ return true;
+ case X86ISD::SETCC:
+ case X86ISD::SETCC_CARRY:
+ CC = (X86::CondCode)User->getConstantOperandVal(0);
+ break;
+ case X86ISD::BRCOND:
+ CC = (X86::CondCode)User->getConstantOperandVal(2);
+ break;
+ case X86ISD::CMOV:
+ CC = (X86::CondCode)User->getConstantOperandVal(2);
+ break;
+ }
+
+ switch (CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool onlyZeroFlagUsed(SDValue Flags) {
+ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ unsigned CCOpNo;
+ switch (User->getOpcode()) {
+ default:
+ // Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ }
+
+ X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return false;
+ }
+
+ return true;
+}
+
+static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
+ // Only handle test patterns.
+ if (!isNullConstant(N->getOperand(1)))
+ return SDValue();
+
+ // If we have a CMP of a truncated binop, see if we can make a smaller binop
+ // and use its flags directly.
+ // TODO: Maybe we should try promoting compares that only use the zero flag
+ // first if we can prove the upper bits with computeKnownBits?
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // If we have a constant logical shift that's only used in a comparison
+ // against zero turn it into an equivalent AND. This allows turning it into
+ // a TEST instruction later.
+ if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) &&
+ Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
+ onlyZeroFlagUsed(SDValue(N, 0))) {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ if (ShAmt < BitWidth) { // Avoid undefined shifts.
+ APInt Mask = Op.getOpcode() == ISD::SRL
+ ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
+ : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+ if (Mask.isSignedIntN(32)) {
+ Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+ DAG.getConstant(Mask, dl, VT));
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, VT));
+ }
+ }
+ }
+
+
+ // Look for a truncate with a single use.
+ if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
+ return SDValue();
+
+ Op = Op.getOperand(0);
+
+ // Arithmetic op can only have one use.
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default: return SDValue();
+ case ISD::AND:
+ // Skip and with constant. We have special handling for and with immediate
+ // during isel to generate test instructions.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ NewOpc = X86ISD::AND;
+ break;
+ case ISD::OR: NewOpc = X86ISD::OR; break;
+ case ISD::XOR: NewOpc = X86ISD::XOR; break;
+ case ISD::ADD:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::ADD;
+ break;
+ case ISD::SUB:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::SUB;
+ break;
+ }
+
+ // We found an op we can narrow. Truncate its inputs.
+ SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));
+
+ // Use a X86 specific opcode to avoid DAG combine messing with it.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1);
+
+ // For AND, keep a CMP so that we can match the test pattern.
+ if (NewOpc == X86ISD::AND)
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, VT));
+
+ // Return the flags.
+ return Op.getValue(1);
+}
+
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
MVT VT = N->getSimpleValueType(0);
@@ -38531,21 +40383,6 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
-/// which is more useful than 0/1 in some cases.
-static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
- SDLoc DL(N);
- // "Condition code B" is also known as "the carry flag" (CF).
- SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
- SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
- MVT VT = N->getSimpleValueType(0);
- if (VT == MVT::i8)
- return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
-
- assert(VT == MVT::i1 && "Unexpected type for SETCC node");
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
-}
-
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
@@ -38616,13 +40453,11 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
}
if (CC == X86::COND_B) {
- // X + SETB Z --> X + (mask SBB Z, Z)
- // X - SETB Z --> X - (mask SBB Z, Z)
- // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
- SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
- if (SBB.getValueSizeInBits() != VT.getSizeInBits())
- SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
- return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ // X + SETB Z --> adc X, 0
+ // X - SETB Z --> sbb X, 0
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), Y.getOperand(1));
}
if (CC == X86::COND_A) {
@@ -38640,10 +40475,9 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
- if (SBB.getValueSizeInBits() != VT.getSizeInBits())
- SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
- return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
}
}
@@ -38713,23 +40547,23 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE2())
return SDValue();
- SDValue MulOp = N->getOperand(0);
- SDValue Phi = N->getOperand(1);
-
- if (MulOp.getOpcode() != ISD::MUL)
- std::swap(MulOp, Phi);
- if (MulOp.getOpcode() != ISD::MUL)
- return SDValue();
-
- ShrinkMode Mode;
- if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
- return SDValue();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// If the vector size is less than 128, or greater than the supported RegSize,
// do not use PMADD.
- if (VT.getVectorNumElements() < 8)
+ if (!VT.isVector() || VT.getVectorNumElements() < 8)
+ return SDValue();
+
+ if (Op0.getOpcode() != ISD::MUL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ ShrinkMode Mode;
+ if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16)
return SDValue();
SDLoc DL(N);
@@ -38738,22 +40572,34 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements() / 2);
- // Shrink the operands of mul.
- SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
-
// Madd vector size is half of the original vector size
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
};
- SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
- PMADDWDBuilder);
- // Fill the rest of the output with 0
- SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
- return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
+
+ auto BuildPMADDWD = [&](SDValue Mul) {
+ // Shrink the operands of mul.
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1));
+
+ SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
+ PMADDWDBuilder);
+ // Fill the rest of the output with 0
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd,
+ DAG.getConstant(0, DL, MAddVT));
+ };
+
+ Op0 = BuildPMADDWD(Op0);
+
+ // It's possible that Op1 is also a mul we can reduce.
+ if (Op1.getOpcode() == ISD::MUL &&
+ canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) {
+ Op1 = BuildPMADDWD(Op1);
+ }
+
+ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -38786,45 +40632,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
// We know N is a reduction add, which means one of its operands is a phi.
// To match SAD, we need the other operand to be a vector select.
- SDValue SelectOp, Phi;
- if (Op0.getOpcode() == ISD::VSELECT) {
- SelectOp = Op0;
- Phi = Op1;
- } else if (Op1.getOpcode() == ISD::VSELECT) {
- SelectOp = Op1;
- Phi = Op0;
- } else
- return SDValue();
+ if (Op0.getOpcode() != ISD::VSELECT)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
+ // SAD pattern detected. Now build a SAD instruction and an addition for
+ // reduction. Note that the number of elements of the result of SAD is less
+ // than the number of elements of its input. Therefore, we could only update
+ // part of elements in the reduction vector.
+ SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
+
+ // The output of PSADBW is a vector of i64.
+ // We need to turn the vector of i64 into a vector of i32.
+ // If the reduction vector is at least as wide as the psadbw result, just
+ // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
+ // anyway.
+ MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
+ if (VT.getSizeInBits() >= ResVT.getSizeInBits())
+ Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
+ else
+ Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+
+ if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
+ // Fill the upper elements with zero to match the add width.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ return Sad;
+ };
// Check whether we have an abs-diff pattern feeding into the select.
- if(!detectZextAbsDiff(SelectOp, Op0, Op1))
- return SDValue();
-
- // SAD pattern detected. Now build a SAD instruction and an addition for
- // reduction. Note that the number of elements of the result of SAD is less
- // than the number of elements of its input. Therefore, we could only update
- // part of elements in the reduction vector.
- SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
-
- // The output of PSADBW is a vector of i64.
- // We need to turn the vector of i64 into a vector of i32.
- // If the reduction vector is at least as wide as the psadbw result, just
- // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
- // anyway.
- MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- if (VT.getSizeInBits() >= ResVT.getSizeInBits())
- Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
- else
- Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+ SDValue SadOp0, SadOp1;
+ if (!detectZextAbsDiff(Op0, SadOp0, SadOp1))
+ return SDValue();
- if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
- // Fill the upper elements with zero to match the add width.
- SDValue Zero = DAG.getConstant(0, DL, VT);
- Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
- DAG.getIntPtrConstant(0, DL));
+ Op0 = BuildPSADBW(SadOp0, SadOp1);
+
+ // It's possible we have a sad on the other side too.
+ if (Op1.getOpcode() == ISD::VSELECT &&
+ detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
+ Op1 = BuildPSADBW(SadOp0, SadOp1);
}
- return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
+ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
}
/// Convert vector increment or decrement to sub/add with an all-ones constant:
@@ -38843,10 +40697,8 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
return SDValue();
- SDNode *N1 = N->getOperand(1).getNode();
APInt SplatVal;
- if (!ISD::isConstantSplatVector(N1, SplatVal) ||
- !SplatVal.isOneValue())
+ if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
return SDValue();
SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
@@ -38963,6 +40815,39 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
PMADDBuilder);
}
+// Try to turn (add (umax X, C), -C) into (psubus X, C)
+static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasSSE2())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // psubus is available in SSE2 for i8 and i16 vectors.
+ if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
+ !isPowerOf2_32(VT.getVectorNumElements()) ||
+ !(VT.getVectorElementType() == MVT::i8 ||
+ VT.getVectorElementType() == MVT::i16))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::UMAX)
+ return SDValue();
+
+ // The add should have a constant that is the negative of the max.
+ // TODO: Handle build_vectors with undef elements.
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return Max->getAPIntValue() == (-Op->getAPIntValue());
+ };
+ if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0),
+ Op0.getOperand(1));
+}
+
// Attempt to turn this pattern into PMADDWD.
// (mul (add (zext (build_vector)), (zext (build_vector))),
// (add (zext (build_vector)), (zext (build_vector)))
@@ -39105,7 +40990,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true)) {
+ Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
@@ -39117,6 +41003,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineIncDecVector(N, DAG))
return V;
+ if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -39162,23 +41051,22 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
} else
return SDValue();
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
};
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
- { SubusLHS, SubusRHS }, SUBUSBuilder);
+ { SubusLHS, SubusRHS }, USUBSATBuilder);
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
// so we require first 16 bits to be zeros for 32 bit
// values, or first 48 bits for 64 bit values.
- KnownBits Known;
- DAG.computeKnownBits(SubusLHS, Known);
+ KnownBits Known = DAG.computeKnownBits(SubusLHS);
unsigned NumZeros = Known.countMinLeadingZeros();
if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16)
return SDValue();
@@ -39203,7 +41091,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
SDValue Psubus =
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
- { NewSubusLHS, NewSubusRHS }, SUBUSBuilder);
+ { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
@@ -39236,7 +41124,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false)) {
+ Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
+ shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
@@ -39255,98 +41144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
return combineAddOrSubToADCOrSBB(N, DAG);
}
-static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- SDLoc DL(N);
- unsigned Opcode = N->getOpcode();
- MVT VT = N->getSimpleValueType(0);
- MVT SVT = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned EltSizeInBits = SVT.getSizeInBits();
-
- SDValue Op = N->getOperand(0);
- MVT OpVT = Op.getSimpleValueType();
- MVT OpEltVT = OpVT.getVectorElementType();
- unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
- unsigned InputBits = OpEltSizeInBits * NumElts;
-
- // Perform any constant folding.
- // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
- APInt UndefElts;
- SmallVector<APInt, 64> EltBits;
- if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
- APInt Undefs(NumElts, 0);
- SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
- bool IsZEXT =
- (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (UndefElts[i]) {
- Undefs.setBit(i);
- continue;
- }
- Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
- : EltBits[i].sextOrTrunc(EltSizeInBits);
- }
- return getConstVector(Vals, Undefs, VT, DAG, DL);
- }
-
- // (vzext (bitcast (vzext (x)) -> (vzext x)
- // TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
- SDValue V = peekThroughBitcasts(Op);
- if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
- MVT InnerVT = V.getSimpleValueType();
- MVT InnerEltVT = InnerVT.getVectorElementType();
-
- // If the element sizes match exactly, we can just do one larger vzext. This
- // is always an exact type match as vzext operates on integer types.
- if (OpEltVT == InnerEltVT) {
- assert(OpVT == InnerVT && "Types must match for vzext!");
- return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
- }
-
- // The only other way we can combine them is if only a single element of the
- // inner vzext is used in the input to the outer vzext.
- if (InnerEltVT.getSizeInBits() < InputBits)
- return SDValue();
-
- // In this case, the inner vzext is completely dead because we're going to
- // only look at bits inside of the low element. Just do the outer vzext on
- // a bitcast of the input to the inner.
- return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
- }
-
- // Check if we can bypass extracting and re-inserting an element of an input
- // vector. Essentially:
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
- // TODO: Add X86ISD::VSEXT support
- if (Opcode == X86ISD::VZEXT &&
- V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
- SDValue ExtractedV = V.getOperand(0);
- SDValue OrigV = ExtractedV.getOperand(0);
- if (isNullConstant(ExtractedV.getOperand(1))) {
- MVT OrigVT = OrigV.getSimpleValueType();
- // Extract a subvector if necessary...
- if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
- int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
- OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
- OrigVT.getVectorNumElements() / Ratio);
- OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
- DAG.getIntPtrConstant(0, DL));
- }
- Op = DAG.getBitcast(OpVT, OrigV);
- return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
- }
- }
-
- return SDValue();
-}
-
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
@@ -39354,9 +41151,9 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
- return getOnesVector(VT, DAG, DL);
+ return DAG.getConstant(-1, DL, VT);
if (N->getOpcode() == X86ISD::PCMPGT)
- return getZeroVector(VT, Subtarget, DAG, DL);
+ return DAG.getConstant(0, DL, VT);
}
return SDValue();
@@ -39487,11 +41284,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
return Ld;
}
}
- // If lower/upper loads are the same and the only users of the load, then
- // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ // If lower/upper loads are the same and there's no other use of the lower
+ // load, then splat the loaded value with a broadcast.
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
- SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode()))
+ if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
// If this is subv_broadcast insert into both halves, use a larger
@@ -39528,6 +41324,39 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ // For AVX1 only, if we are extracting from a 256-bit and+not (which will
+ // eventually get combined/lowered into ANDNP) with a concatenated operand,
+ // split the 'and' into 128-bit ops to avoid the concatenate and extract.
+ // We let generic combining take over from there to simplify the
+ // insert/extract and 'not'.
+ // This pattern emerges during AVX1 legalization. We handle it before lowering
+ // to avoid complications like splitting constant vector loads.
+
+ // Capture the original wide type in the likely case that we need to bitcast
+ // back to this type.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = N->getOperand(0).getValueType();
+ SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
+ TLI.isTypeLegal(WideVecVT) &&
+ WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) {
+ auto isConcatenatedNot = [] (SDValue V) {
+ V = peekThroughBitcasts(V);
+ if (!isBitwiseNot(V))
+ return false;
+ SDValue NotOp = V->getOperand(0);
+ return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS;
+ };
+ if (isConcatenatedNot(WideVec.getOperand(0)) ||
+ isConcatenatedNot(WideVec.getOperand(1))) {
+ // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
+ SDValue Concat = split256IntArith(WideVec, DAG);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+ DAG.getBitcast(WideVecVT, Concat), N->getOperand(1));
+ }
+ }
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -39565,13 +41394,32 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
}
}
- if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) &&
+ if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) &&
OpVT.is128BitVector() &&
InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
- unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG;
+ unsigned ExtOp =
+ InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG
+ : ISD::SIGN_EXTEND_VECTOR_INREG;
return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
}
+ if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ OpVT.is128BitVector() &&
+ InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
+ return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0));
+ }
+ if (InOpcode == ISD::BITCAST) {
+ // TODO - do this for target shuffles in general.
+ SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
+ if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
+ SDLoc DL(N);
+ SDValue SubPSHUFB =
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+ extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
+ extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
+ return DAG.getBitcast(OpVT, SubPSHUFB);
+ }
+ }
}
return SDValue();
@@ -39591,6 +41439,15 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
Src.getOperand(0));
+ // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
+ if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
+ Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
+ if (C->isNullValue())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+ Src.getOperand(0), Src.getOperand(1));
+
return SDValue();
}
@@ -39600,23 +41457,28 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ // Canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(RHS))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);
+
+ // Multiply by zero.
+ if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+ return RHS;
+
+ // Aggressively peek through ops to get at the demanded low bits.
+ APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+ SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask);
+ SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask);
+ if (DemandedLHS || DemandedRHS)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ DemandedLHS ? DemandedLHS : LHS,
+ DemandedRHS ? DemandedRHS : RHS);
+
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- APInt DemandedMask(APInt::getLowBitsSet(64, 32));
-
- // PMULQDQ/PMULUDQ only uses lower 32 bits from each vector element.
- KnownBits LHSKnown;
- if (TLI.SimplifyDemandedBits(LHS, DemandedMask, LHSKnown, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
return SDValue(N, 0);
- }
-
- KnownBits RHSKnown;
- if (TLI.SimplifyDemandedBits(RHS, DemandedMask, RHSKnown, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
- return SDValue(N, 0);
- }
return SDValue();
}
@@ -39638,9 +41500,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return combineExtractSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
- case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
+ case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
+ case X86ISD::CMP: return combineCMP(N, DAG);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
case X86ISD::SBB: return combineSBB(N, DAG);
@@ -39656,7 +41519,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, Subtarget);
- case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget);
+ case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
@@ -39672,6 +41535,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
+ case X86ISD::CVTSI2P:
+ case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
@@ -39682,14 +41547,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::PACKSS:
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
+ case X86ISD::VSHL:
+ case X86ISD::VSRA:
+ case X86ISD::VSRL:
+ return combineVectorShiftVar(N, DAG, DCI, Subtarget);
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
return combineVectorShiftImm(N, DAG, DCI, Subtarget);
- case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- case X86ISD::VSEXT:
- case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
case X86ISD::PINSRB:
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
@@ -39751,10 +41616,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
-/// Return true if the target has native support for the specified value type
-/// and it is 'desirable' to use the type for the given node type. e.g. On x86
-/// i16 is legal, but undesirable since i16 instruction encodings are longer and
-/// some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
@@ -39763,26 +41624,37 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
return false;
- if (VT != MVT::i16)
- return true;
-
- switch (Opc) {
- default:
- return true;
- case ISD::LOAD:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- case ISD::SHL:
- case ISD::SRL:
- case ISD::SUB:
- case ISD::ADD:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
+ // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
+ // we have specializations to turn 32-bit multiply into LEA or other ops.
+ // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
+ // check for a constant operand to the multiply.
+ if (Opc == ISD::MUL && VT == MVT::i8)
return false;
+
+ // i16 instruction encodings are longer and some i16 instructions are slow,
+ // so those are not desirable.
+ if (VT == MVT::i16) {
+ switch (Opc) {
+ default:
+ break;
+ case ISD::LOAD:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return false;
+ }
}
+
+ // Any legal type not explicitly accounted for above here is desirable.
+ return true;
}
SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
@@ -39801,12 +41673,16 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}
-/// This method query the target whether it is beneficial for dag combiner to
-/// promote the specified node. If true, it should return the desired promotion
-/// type by reference.
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
- if (VT != MVT::i16)
+ bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
+ isa<ConstantSDNode>(Op.getOperand(1));
+
+ // i16 is legal, but undesirable since i16 instruction encodings are longer
+ // and some i16 instructions are slow.
+ // 8-bit multiply-by-constant can usually be expanded to something cheaper
+ // using LEA and/or other ALU ops.
+ if (VT != MVT::i16 && !Is8BitMulByConstant)
return false;
auto IsFoldableRMW = [](SDValue Load, SDValue Op) {
@@ -39820,6 +41696,19 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return Ld->getBasePtr() == St->getBasePtr();
};
+ auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
+ if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD)
+ return false;
+ if (!Op.hasOneUse())
+ return false;
+ SDNode *User = *Op->use_begin();
+ if (User->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ auto *Ld = cast<AtomicSDNode>(Load);
+ auto *St = cast<AtomicSDNode>(User);
+ return Ld->getBasePtr() == St->getBasePtr();
+ };
+
bool Commute = false;
switch (Op.getOpcode()) {
default: return false;
@@ -39854,6 +41743,9 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
+ if (IsFoldableAtomicRMW(N0, Op) ||
+ (Commute && IsFoldableAtomicRMW(N1, Op)))
+ return false;
}
}
@@ -40593,44 +42485,33 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (!Res.second) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
- tolower(Constraint[1]) == 's' &&
- tolower(Constraint[2]) == 't' &&
+ tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
Constraint[3] == '(' &&
(Constraint[4] >= '0' && Constraint[4] <= '7') &&
- Constraint[5] == ')' &&
- Constraint[6] == '}') {
-
- Res.first = X86::FP0+Constraint[4]-'0';
- Res.second = &X86::RFP80RegClass;
- return Res;
+ Constraint[5] == ')' && Constraint[6] == '}') {
+ // st(7) is not allocatable and thus not a member of RFP80. Return
+ // singleton class in cases where we have a reference to it.
+ if (Constraint[4] == '7')
+ return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
+ return std::make_pair(X86::FP0 + Constraint[4] - '0',
+ &X86::RFP80RegClass);
}
// GCC allows "st(0)" to be called just plain "st".
- if (StringRef("{st}").equals_lower(Constraint)) {
- Res.first = X86::FP0;
- Res.second = &X86::RFP80RegClass;
- return Res;
- }
+ if (StringRef("{st}").equals_lower(Constraint))
+ return std::make_pair(X86::FP0, &X86::RFP80RegClass);
// flags -> EFLAGS
- if (StringRef("{flags}").equals_lower(Constraint)) {
- Res.first = X86::EFLAGS;
- Res.second = &X86::CCRRegClass;
- return Res;
- }
+ if (StringRef("{flags}").equals_lower(Constraint))
+ return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
// 'A' means [ER]AX + [ER]DX.
if (Constraint == "A") {
- if (Subtarget.is64Bit()) {
- Res.first = X86::RAX;
- Res.second = &X86::GR64_ADRegClass;
- } else {
- assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
- "Expecting 64, 32 or 16 bit subtarget");
- Res.first = X86::EAX;
- Res.second = &X86::GR32_ADRegClass;
- }
- return Res;
+ if (Subtarget.is64Bit())
+ return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
+ assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+ "Expecting 64, 32 or 16 bit subtarget");
+ return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
}
return Res;
}
@@ -40640,18 +42521,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
(isFRClass(*Res.second) || isGRClass(*Res.second)) &&
TRI->getEncodingValue(Res.first) >= 8) {
// Register requires REX prefix, but we're in 32-bit mode.
- Res.first = 0;
- Res.second = nullptr;
- return Res;
+ return std::make_pair(0, nullptr);
}
// Make sure it isn't a register that requires AVX512.
if (!Subtarget.hasAVX512() && isFRClass(*Res.second) &&
TRI->getEncodingValue(Res.first) & 0x10) {
// Register requires EVEX prefix.
- Res.first = 0;
- Res.second = nullptr;
- return Res;
+ return std::make_pair(0, nullptr);
}
// Otherwise, check to see if this is a register class of the wrong value
@@ -40679,14 +42556,36 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
: Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
: Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
- : &X86::GR64RegClass;
- if (RC->contains(DestReg))
- Res = std::make_pair(DestReg, RC);
- } else {
- // No register found/type mismatch.
- Res.first = 0;
- Res.second = nullptr;
+ : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
+ : nullptr;
+ if (Size == 64 && !is64Bit) {
+ // Model GCC's behavior here and select a fixed pair of 32-bit
+ // registers.
+ switch (Res.first) {
+ case X86::EAX:
+ return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
+ case X86::EDX:
+ return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
+ case X86::ECX:
+ return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
+ case X86::EBX:
+ return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
+ case X86::ESI:
+ return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
+ case X86::EDI:
+ return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
+ case X86::EBP:
+ return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
+ default:
+ return std::make_pair(0, nullptr);
+ }
+ }
+ if (RC && RC->contains(DestReg))
+ return std::make_pair(DestReg, RC);
+ return Res;
}
+ // No register found/type mismatch.
+ return std::make_pair(0, nullptr);
} else if (isFRClass(*Class)) {
// Handle references to XMM physical registers that got mapped into the
// wrong class. This can happen with constraints like {xmm0} where the
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index ff5006d208e5..910acd80e8b8 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -98,7 +98,7 @@ namespace llvm {
SETCC,
/// X86 Select
- SELECT, SELECTS,
+ SELECTS,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
@@ -203,8 +203,9 @@ namespace llvm {
/// Dynamic (non-constant condition) vector blend where only the sign bits
/// of the condition elements are used. This is used to enforce that the
- /// condition mask is not valid for generic VSELECT optimizations.
- SHRUNKBLEND,
+ /// condition mask is not valid for generic VSELECT optimizations. This
+ /// can also be used to implement the intrinsics.
+ BLENDV,
/// Combined add and sub on an FP vector.
ADDSUB,
@@ -226,14 +227,6 @@ namespace llvm {
SCALEF,
SCALEFS,
- // Integer add/sub with unsigned saturation.
- ADDUS,
- SUBUS,
-
- // Integer add/sub with signed saturation.
- ADDS,
- SUBS,
-
// Unsigned Integer average.
AVG,
@@ -295,22 +288,27 @@ namespace llvm {
// Vector move to low scalar and zero higher vector elements.
VZEXT_MOVL,
- // Vector integer zero-extend.
- VZEXT,
- // Vector integer signed-extend.
- VSEXT,
-
// Vector integer truncate.
VTRUNC,
// Vector integer truncate with unsigned/signed saturation.
VTRUNCUS, VTRUNCS,
+ // Masked version of the above. Used when less than a 128-bit result is
+ // produced since the mask only applies to the lower elements and can't
+ // be represented by a select.
+ // SRC, PASSTHRU, MASK
+ VMTRUNC, VMTRUNCUS, VMTRUNCS,
+
// Vector FP extend.
VFPEXT, VFPEXT_RND, VFPEXTS_RND,
// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+ // Masked version of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ VMFPROUND,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -349,21 +347,14 @@ namespace llvm {
CMPM_RND,
// Arithmetic operations with FLAGS results.
- ADD, SUB, ADC, SBB, SMUL,
- INC, DEC, OR, XOR, AND,
+ ADD, SUB, ADC, SBB, SMUL, UMUL,
+ OR, XOR, AND,
// Bit field extract.
BEXTR,
- // LOW, HI, FLAGS = umul LHS, RHS.
- UMUL,
-
- // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
- SMUL8, UMUL8,
-
- // 8-bit divrem that zero-extend the high result (AH).
- UDIVREM8_ZEXT_HREG,
- SDIVREM8_SEXT_HREG,
+ // Zero High Bits Starting with Specified Bit Position.
+ BZHI,
// X86-specific multiply by immediate.
MUL_IMM,
@@ -513,16 +504,20 @@ namespace llvm {
// Vector float/double to signed/unsigned integer.
CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
// Scalar float/double to signed/unsigned integer.
- CVTS2SI_RND, CVTS2UI_RND,
+ CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
// Vector float/double to signed/unsigned integer with truncation.
CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
// Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI_RND, CVTTS2UI_RND,
+ CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
// Vector signed/unsigned integer to float/double.
CVTSI2P, CVTUI2P,
+ // Masked versions of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
+
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
@@ -570,6 +565,10 @@ namespace llvm {
// Conversions between float and half-float.
CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
+ // Masked version of above.
+ // SRC, RND, PASSTHRU, MASK
+ MCVTPS2PH,
+
// Galois Field Arithmetic Instructions
GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
@@ -588,7 +587,7 @@ namespace llvm {
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
+ LADD, LSUB, LOR, LXOR, LAND,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -833,6 +832,8 @@ namespace llvm {
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
+ bool shouldSplatInsEltVarIndex(EVT VT) const override;
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
@@ -866,10 +867,21 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth) const override;
- SDValue unwrapAddress(SDValue N) const override;
+ bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &KnownUndef,
+ APInt &KnownZero,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
- bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
- int64_t &Offset) const override;
+ bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
+
+ SDValue unwrapAddress(SDValue N) const override;
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
@@ -932,6 +944,8 @@ namespace llvm {
/// the immediate into a register.
bool isLegalAddImmediate(int64_t Imm) const override;
+ bool isLegalStoreImmediate(int64_t Imm) const override;
+
/// Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
@@ -1030,13 +1044,25 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
bool convertSelectOfConstantsToMath(EVT VT) const override;
+ bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+
+ bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+ bool IsSigned) const override;
+
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
+ /// Scalar ops always have equal or better analysis/performance/power than
+ /// the vector equivalent, so this always makes sense if the scalar op is
+ /// supported.
+ bool shouldScalarizeBinop(SDValue) const override;
+
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction
@@ -1095,7 +1121,7 @@ namespace llvm {
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
/// Customize the preferred legalization strategy for certain types.
- LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+ LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
@@ -1347,11 +1373,6 @@ namespace llvm {
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Emit nodes that will be selected as "test Op0,Op0", or something
- /// equivalent, for use with the given x86 condition code.
- SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG) const;
-
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
@@ -1360,6 +1381,13 @@ namespace llvm {
/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+ /// Emit flags for the given setcc condition and operands. Also returns the
+ /// corresponding X86 condition code constant in X86CC.
+ SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+ ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG,
+ SDValue &X86CC) const;
+
/// Check if replacement of SQRT with RSQRT should be disabled.
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
@@ -1407,9 +1435,9 @@ namespace llvm {
MachineMemOperand *MMO)
: MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
- const SDValue &getBasePtr() const { return getOperand(1); }
- const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getValue() const { return getOperand(3); }
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
@@ -1480,7 +1508,6 @@ namespace llvm {
const SDValue &getBasePtr() const { return getOperand(3); }
const SDValue &getIndex() const { return getOperand(4); }
const SDValue &getMask() const { return getOperand(2); }
- const SDValue &getValue() const { return getOperand(1); }
const SDValue &getScale() const { return getOperand(5); }
static bool classof(const SDNode *N) {
@@ -1496,6 +1523,8 @@ namespace llvm {
: X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
MMO) {}
+ const SDValue &getPassThru() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::MGATHER;
}
@@ -1508,6 +1537,8 @@ namespace llvm {
: X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
MMO) {}
+ const SDValue &getValue() const { return getOperand(1); }
+
static bool classof(const SDNode *N) {
return N->getOpcode() == X86ISD::MSCATTER;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
new file mode 100644
index 000000000000..30b46a09ef0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -0,0 +1,253 @@
+//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass applies cache prefetch instructions based on a profile. The pass
+// assumes DiscriminateMemOps ran immediately before, to ensure debug info
+// matches the one used at profile generation time. The profile is encoded in
+// afdo format (text or binary). It contains prefetch hints recommendations.
+// Each recommendation is made in terms of debug info locations, a type (i.e.
+// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
+// memory operand (see X86DiscriminateMemOps). The prefetch will be made for
+// a location at that memory operand + the delta specified in the
+// recommendation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+using namespace sampleprof;
+
+static cl::opt<std::string>
+ PrefetchHintsFile("prefetch-hints-file",
+ cl::desc("Path to the prefetch hints profile."),
+ cl::Hidden);
+namespace {
+
+class X86InsertPrefetch : public MachineFunctionPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ struct PrefetchInfo {
+ unsigned InstructionID;
+ int64_t Delta;
+ };
+ typedef SmallVectorImpl<PrefetchInfo> Prefetches;
+ bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
+ Prefetches &prefetches) const;
+
+public:
+ static char ID;
+ X86InsertPrefetch(const std::string &PrefetchHintsFilename);
+ StringRef getPassName() const override {
+ return "X86 Insert Cache Prefetches";
+ }
+
+private:
+ std::string Filename;
+ std::unique_ptr<SampleProfileReader> Reader;
+};
+
+using PrefetchHints = SampleRecord::CallTargetMap;
+
+// Return any prefetching hints for the specified MachineInstruction. The hints
+// are returned as pairs (name, delta).
+ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
+ const MachineInstr &MI) {
+ if (const auto &Loc = MI.getDebugLoc())
+ if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
+ return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
+ Loc->getBaseDiscriminator());
+ return std::error_code();
+}
+
+// The prefetch instruction can't take memory operands involving vector
+// registers.
+bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
+ unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
+ unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
+ return (BaseReg == 0 ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
+ (IndexReg == 0 ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char X86InsertPrefetch::ID = 0;
+
+X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
+ : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
+
+/// Return true if the provided MachineInstruction has cache prefetch hints. In
+/// that case, the prefetch hints are stored, in order, in the Prefetches
+/// vector.
+bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
+ const MachineInstr &MI,
+ Prefetches &Prefetches) const {
+ assert(Prefetches.empty() &&
+ "Expected caller passed empty PrefetchInfo vector.");
+ static const std::pair<const StringRef, unsigned> HintTypes[] = {
+ {"_nta_", X86::PREFETCHNTA},
+ {"_t0_", X86::PREFETCHT0},
+ {"_t1_", X86::PREFETCHT1},
+ {"_t2_", X86::PREFETCHT2},
+ };
+ static const char *SerializedPrefetchPrefix = "__prefetch";
+
+ const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
+ if (!T)
+ return false;
+ int16_t max_index = -1;
+ // Convert serialized prefetch hints into PrefetchInfo objects, and populate
+ // the Prefetches vector.
+ for (const auto &S_V : *T) {
+ StringRef Name = S_V.getKey();
+ if (Name.consume_front(SerializedPrefetchPrefix)) {
+ int64_t D = static_cast<int64_t>(S_V.second);
+ unsigned IID = 0;
+ for (const auto &HintType : HintTypes) {
+ if (Name.startswith(HintType.first)) {
+ Name = Name.drop_front(HintType.first.size());
+ IID = HintType.second;
+ break;
+ }
+ }
+ if (IID == 0)
+ return false;
+ uint8_t index = 0;
+ Name.consumeInteger(10, index);
+
+ if (index >= Prefetches.size())
+ Prefetches.resize(index + 1);
+ Prefetches[index] = {IID, D};
+ max_index = std::max(max_index, static_cast<int16_t>(index));
+ }
+ }
+ assert(max_index + 1 >= 0 &&
+ "Possible overflow: max_index + 1 should be positive.");
+ assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
+ "The number of prefetch hints received should match the number of "
+ "PrefetchInfo objects returned");
+ return !Prefetches.empty();
+}
+
+bool X86InsertPrefetch::doInitialization(Module &M) {
+ if (Filename.empty())
+ return false;
+
+ LLVMContext &Ctx = M.getContext();
+ ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
+ SampleProfileReader::create(Filename, Ctx);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
+ DiagnosticSeverity::DS_Warning));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ Reader->read();
+ return true;
+}
+
+void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
+ if (!Reader)
+ return false;
+ const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
+ if (!Samples)
+ return false;
+
+ bool Changed = false;
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<PrefetchInfo, 4> Prefetches;
+ for (auto &MBB : MF) {
+ for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
+ auto Current = MI;
+ ++MI;
+
+ int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
+ if (Offset < 0)
+ continue;
+ unsigned Bias = X86II::getOperandBias(Current->getDesc());
+ int MemOpOffset = Offset + Bias;
+ // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
+ if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
+ continue;
+ Prefetches.clear();
+ if (!findPrefetchInfo(Samples, *Current, Prefetches))
+ continue;
+ assert(!Prefetches.empty() &&
+ "The Prefetches vector should contain at least a value if "
+ "findPrefetchInfo returned true.");
+ for (auto &PrefInfo : Prefetches) {
+ unsigned PFetchInstrID = PrefInfo.InstructionID;
+ int64_t Delta = PrefInfo.Delta;
+ const MCInstrDesc &Desc = TII->get(PFetchInstrID);
+ MachineInstr *PFetch =
+ MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, PFetch);
+
+ assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
+ X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
+ X86::AddrSegmentReg == 4 &&
+ "Unexpected change in X86 operand offset order.");
+
+ // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
+ // FIXME(mtrofin): consider adding a:
+ // MachineInstrBuilder::set(unsigned offset, op).
+ MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
+ .addImm(
+ Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
+ .addReg(
+ Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
+ .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
+ Delta)
+ .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
+ .getReg());
+
+ if (!Current->memoperands_empty()) {
+ MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
+ }
+
+ // Insert before Current. This is because Current may clobber some of
+ // the registers used to describe the input memory operand.
+ MBB.insert(Current, PFetch);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createX86InsertPrefetchPass() {
+ return new X86InsertPrefetch(PrefetchHintsFile);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
index 46dc6bf7661a..49e9e924887a 100644
--- a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -38,7 +38,7 @@ multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
@@ -51,7 +51,7 @@ multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 2d95061a8213..7423cb85acd2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -66,21 +66,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
// Load patterns
- // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
- // due to load promotion during legalization
- PatFrag LdFrag = !cast<PatFrag>("load" #
- !if (!eq (TypeVariantName, "i"),
- !if (!eq (Size, 128), "v2i64",
- !if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512), "v8i64",
- VTName))), VTName));
-
- PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
- !if (!eq (TypeVariantName, "i"),
- !if (!eq (Size, 128), "v2i64",
- !if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512), "v8i64",
- VTName))), VTName));
+ PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
+
+ PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
@@ -107,10 +95,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
- // A vector tye of the same width with element type i64. This is used to
- // create patterns for logic ops.
- ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
-
// A vector type of the same width with element type i32. This is used to
// create the canonical constant zero node ImmAllZerosV.
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
@@ -518,13 +502,13 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm)),
(vinsert_for_mask:$src3 (To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<From.EltSize, From.CD8TupleForm>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -547,7 +531,7 @@ multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
def : Pat<(vinsert_insert:$ins
(To.VT To.RC:$src1),
- (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm)),
(To.VT (!cast<Instruction>(InstrStr#"rm")
To.RC:$src1, addr:$src2,
@@ -680,9 +664,7 @@ let Predicates = p in {
(vselect Cast.KRCWM:$mask,
(bitconvert
(vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT
- (bitconvert
- (From.LdFrag addr:$src2))),
+ (From.VT (From.LdFrag addr:$src2)),
(iPTR imm))),
Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rmkz")
@@ -783,7 +765,7 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
//===----------------------------------------------------------------------===//
@@ -1374,7 +1356,7 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.VT (_Src.LdFrag addr:$src))))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1389,7 +1371,7 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag),
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.VT (_Src.LdFrag addr:$src))))>,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
@@ -1442,11 +1424,11 @@ defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
-def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
+def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
(VBROADCASTI64X4rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
@@ -1474,9 +1456,9 @@ def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
@@ -1506,11 +1488,11 @@ def : Pat<(vselect VK8WM:$mask,
VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1527,9 +1509,9 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
@@ -1591,11 +1573,11 @@ def : Pat<(vselect VK4WM:$mask,
VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
(bc_v4i64 (v8i32 immAllZerosV))),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
@@ -1641,11 +1623,11 @@ def : Pat<(vselect VK8WM:$mask,
VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1741,8 +1723,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
- (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src3)))), 1>,
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1758,7 +1740,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
(_.VT (X86VPermt2 _.RC:$src2,
IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
@@ -1859,8 +1841,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
- (bitconvert (_.LdFrag addr:$src3)))), 1>,
- EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.LdFrag addr:$src3))), 1>,
+ EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
@@ -1874,7 +1856,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
(_.VT (X86VPermt2 _.RC:$src1,
IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
@@ -1955,19 +1937,19 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
}
@@ -1980,7 +1962,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1988,7 +1970,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1996,7 +1978,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2067,7 +2049,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
@@ -2094,7 +2076,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
@@ -2123,7 +2105,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
(_.ScalarLdFrag addr:$src2),
imm:$cc))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2149,8 +2131,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src2))))]>,
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rrk : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -2165,9 +2147,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ (_.VT (_.LdFrag addr:$src2)))))]>,
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2180,7 +2161,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
(X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2),
@@ -2192,7 +2173,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2291,9 +2272,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
[(set _.KRC:$dst, (_.KVT
(Frag:$cc
(_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
@@ -2316,10 +2297,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(_.KVT
(Frag:$cc
(_.VT _.RC:$src1),
- (_.VT (bitconvert
- (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2333,7 +2313,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
@@ -2348,17 +2328,17 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
- def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
(!cast<Instruction>(Name#_.ZSuffix#"rmi")
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+ (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmik")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2380,7 +2360,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond)))]>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, AVX512ICC:$cc),
@@ -2393,7 +2373,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond))))]>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
@@ -2403,7 +2383,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
@@ -2411,7 +2391,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
@@ -2544,9 +2524,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ (_.VT (_.LdFrag addr:$src2)),
imm:$cc)>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2557,7 +2537,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
@@ -2573,7 +2553,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
@@ -2582,7 +2562,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
@@ -2694,7 +2674,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
@@ -2702,7 +2682,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(OpNode _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2732,17 +2712,17 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode
- (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
- (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2))))]>,
- EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2752,7 +2732,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))]>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2762,7 +2742,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))]>,
- EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2907,8 +2887,15 @@ let Predicates = [HasDQI] in {
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
+ def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
+ (KMOVWkm addr:$src)>;
}
+def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
+ SDTCVecEltisVT<1, i1>,
+ SDTCisPtrTy<2>]>>;
+
let Predicates = [HasAVX512] in {
multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
@@ -2916,6 +2903,12 @@ let Predicates = [HasAVX512] in {
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
+
+ def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
+
+ def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
+ (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
}
defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
@@ -3353,7 +3346,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
!if(NoRMPattern, [],
[(set _.RC:$dst,
- (_.VT (bitconvert (ld_frag addr:$src))))]),
+ (_.VT (ld_frag addr:$src)))]),
_.ExeDomain>, EVEX, Sched<[Sched.RM]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
@@ -3372,7 +3365,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT
(vselect _.KRCWM:$mask,
- (_.VT (bitconvert (ld_frag addr:$src1))),
+ (_.VT (ld_frag addr:$src1)),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RM]>;
}
@@ -3381,7 +3374,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
"${dst} {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
- (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+ (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
_.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
@@ -3474,7 +3467,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
[], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
NotMemoryFoldable;
- def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+ def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
(!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
_.KRCWM:$mask, _.RC:$src)>;
@@ -3681,6 +3674,20 @@ let Predicates = [HasBWI, NoVLX] in {
}
let Predicates = [HasAVX512] in {
+ // 512-bit load.
+ def : Pat<(alignedloadv16i32 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(alignedloadv32i16 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(alignedloadv64i8 addr:$src),
+ (VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(loadv16i32 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+ def : Pat<(loadv32i16 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+ def : Pat<(loadv64i8 addr:$src),
+ (VMOVDQU64Zrm addr:$src)>;
+
// 512-bit store.
def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
@@ -3697,6 +3704,20 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [HasVLX] in {
+ // 128-bit load.
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (VMOVDQU64Z128rm addr:$src)>;
+
// 128-bit store.
def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
@@ -3711,6 +3732,20 @@ let Predicates = [HasVLX] in {
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
+ // 256-bit load.
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(alignedloadv16i16 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(alignedloadv32i8 addr:$src),
+ (VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+ def : Pat<(loadv16i16 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+ def : Pat<(loadv32i8 addr:$src),
+ (VMOVDQU64Z256rm addr:$src)>;
+
// 256-bit store.
def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
@@ -4029,10 +4064,10 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4044,10 +4079,10 @@ multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
dag Mask, RegisterClass MaskRC,
SubRegIndex subreg> {
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4064,16 +4099,16 @@ multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
SubRegIndex subreg> {
// AVX512F pattern.
-def : Pat<(masked_store addr:$dst, Mask512,
+def : Pat<(masked_store
(_.info512.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (iPTR 0)))),
+ (iPTR 0))), addr:$dst, Mask512),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
// AVX512VL pattern.
-def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
+def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4421,8 +4456,6 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
@@ -4497,7 +4530,7 @@ let Predicates = [HasAVX512] in {
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
@@ -4593,6 +4626,12 @@ let Predicates = [HasAVX512], AddedComplexity = 400 in {
(VMOVNTDQAZrm addr:$src)>;
def : Pat<(v8i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v16i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v32i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
+ def : Pat<(v64i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZrm addr:$src)>;
}
let Predicates = [HasVLX], AddedComplexity = 400 in {
@@ -4609,6 +4648,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
(VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
+ def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ256rm addr:$src)>;
def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
@@ -4623,6 +4668,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
(VMOVNTDQAZ128rm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAZ128rm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -4641,10 +4692,9 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))>,
+ (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4659,7 +4709,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4773,9 +4823,9 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
- (bitconvert (_Src.LdFrag addr:$src2))))>,
+ (_Src.LdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
@@ -4786,20 +4836,20 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
(_Brdct.VT (X86VBroadcast
(_Brdct.ScalarLdFrag addr:$src2))))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
SchedWriteVecALU, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SchedWriteVecALU, 0>;
-defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
+defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
+defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
SchedWriteVecALU, HasBWI, 0>;
-defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
+defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
+defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
SchedWritePMULLD, HasAVX512, 1>, T8PD;
@@ -4859,7 +4909,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
@@ -4878,9 +4928,9 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
- (bitconvert (_Src.LdFrag addr:$src2))))>,
+ (_Src.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
@@ -5046,95 +5096,356 @@ let Predicates = [HasAVX512, NoVLX] in {
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
-// OpNodeMsk is the OpNode to use when element size is important. OpNode will
-// be set to null_frag for 32-bit elements.
-multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, bit IsCommutable = 0> {
- let hasSideEffects = 0 in
- defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
- "$src2, $src1", "$src1, $src2",
- (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
- (bitconvert (_.VT _.RC:$src2)))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- _.RC:$src2)))),
- IsCommutable>, AVX512BIBase, EVEX_4V,
- Sched<[sched]>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+ SchedWriteVecLogic, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+ SchedWriteVecLogic, HasAVX512>;
- let hasSideEffects = 0, mayLoad = 1 in
- defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
- "$src2, $src1", "$src1, $src2",
- (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
- (bitconvert (_.LdFrag addr:$src2)))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))))>,
- AVX512BIBase, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
+ (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
+ (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
+ (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
+ (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
+ (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
+ (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
+ (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
+ (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+ def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
+ (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
+ (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1,
+ (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(or VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(xor VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128X:$src1,
+ (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
+
+ def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
+ (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
+ (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
+ (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
+ (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
+ (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
+ (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
+ (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
+ (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+ def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
+ (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
+ (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1,
+ (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
+
+ def : Pat<(and VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(or VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(xor VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256X:$src1,
+ (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
}
-// OpNodeMsk is the OpNode to use where element size is important. So use
-// for all of the broadcast patterns.
-multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
- bit IsCommutable = 0> :
- avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
- IsCommutable> {
- defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
- (bitconvert
- (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))))>,
- AVX512BIBase, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
+ (VPANDQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
+ (VPANDQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
+ (VPORQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
+ (VPORQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
+ (VPXORQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
+ (VPXORQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
+ (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+ def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
+ (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+
+ def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPANDQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPANDQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPORQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPORQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPXORQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPXORQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
+ (VPANDNQZrm VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
+ (VPANDNQZrm VR512:$src1, addr:$src2)>;
+
+ def : Pat<(and VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPORDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPXORDZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1,
+ (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+ (VPANDNDZrmb VR512:$src1, addr:$src2)>;
+
+ def : Pat<(and VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(or VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPORQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(xor VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPXORQZrmb VR512:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR512:$src1,
+ (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+ (VPANDNQZrmb VR512:$src1, addr:$src2)>;
+}
+
+// Patterns to catch vselect with different type than logic op.
+multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo IntInfo> {
+ // Masked register-register logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, _.RC:$src2)>;
+
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+ _.RC:$src2)>;
+
+ // Masked register-memory logical operations.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+ (load addr:$src2)))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+ (load addr:$src2)))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+ addr:$src2)>;
}
-multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
- SDNode OpNodeMsk, X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo VTInfo,
- bit IsCommutable = 0> {
- let Predicates = [HasAVX512] in
- defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
- VTInfo.info512, IsCommutable>, EVEX_V512;
+multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo IntInfo> {
+ // Register-broadcast logical operations.
+ def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))),
+ (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.RC:$src0)),
+ (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (bitconvert
+ (IntInfo.VT (OpNode _.RC:$src1,
+ (bitconvert (_.VT
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
+ _.RC:$src1, addr:$src2)>;
+}
- let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
- VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
- VTInfo.info128, IsCommutable>, EVEX_V128;
- }
+multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo SelectInfo,
+ AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+ defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
+ IntInfo.info128>;
+ defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
+ IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+ defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
+ IntInfo.info512>;
+}
}
-multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched,
- bit IsCommutable = 0> {
- defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
- avx512vl_i64_info, IsCommutable>,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
- avx512vl_i32_info, IsCommutable>,
- EVEX_CD8<32, CD8VF>;
-}
-
-defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
- SchedWriteVecLogic, 1>;
-defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
- SchedWriteVecLogic, 1>;
-defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
- SchedWriteVecLogic, 1>;
-defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
- SchedWriteVecLogic>;
+multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo SelectInfo,
+ AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
+ SelectInfo.info128, IntInfo.info128>;
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
+ SelectInfo.info256, IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+ defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
+ SelectInfo.info512, IntInfo.info512>;
+}
+}
+
+multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
+ // i64 vselect with i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+ avx512vl_i8_info>;
+
+ // i32 vselect with i64/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+ avx512vl_i8_info>;
+
+ // f32 vselect with i64/i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+ avx512vl_i8_info>;
+
+ // f64 vselect with i64/i32/i16/i8 logic op
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i64_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i16_info>;
+ defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+ avx512vl_i8_info>;
+
+ defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
+ avx512vl_f32_info,
+ avx512vl_i32_info>;
+ defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
+ avx512vl_f64_info,
+ avx512vl_i64_info>;
+}
+
+defm : avx512_logical_lowering_types<"VPAND", and>;
+defm : avx512_logical_lowering_types<"VPOR", or>;
+defm : avx512_logical_lowering_types<"VPXOR", xor>;
+defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -5157,7 +5468,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
@@ -5171,7 +5482,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -5202,7 +5513,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5217,7 +5528,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5285,7 +5596,7 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
@@ -5320,7 +5631,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
@@ -5328,7 +5639,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))>,
EVEX_4V, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -5439,73 +5750,6 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-// Patterns catch floating point selects with bitcasted integer logic ops.
-multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
- X86VectorVTInfo _, Predicate prd> {
-let Predicates = [prd] in {
- // Masked register-register logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
- _.RC:$src2)>;
- // Masked register-memory logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1,
- (load addr:$src2)))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
- addr:$src2)>;
- // Register-broadcast logical operations.
- def : Pat<(_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))))),
- (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert
- (_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (bitconvert
- (_.i64VT (OpNode _.RC:$src1,
- (bitconvert (_.VT
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
- _.RC:$src1, addr:$src2)>;
-}
-}
-
-multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
- defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
- defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
- defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
-}
-
-defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
-defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
-defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
-defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
-
let Predicates = [HasVLX,HasDQI] in {
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
@@ -5563,7 +5807,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
@@ -5571,7 +5815,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5588,7 +5832,7 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5635,18 +5879,15 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)>,
+ (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
EVEX_4V, Sched<[sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (bitconvert
- (_.i64VT (and _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2))))),
+ (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Patterns for compare with 0 that just use the same source twice.
@@ -5671,13 +5912,13 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
(_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
X86VectorVTInfo _, string Name> {
- def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
_.ImmAllZerosV)),
(_.KVT (COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrr")
@@ -5688,7 +5929,7 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
_.KRC))>;
def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+ (OpNode (and _.RC:$src1, _.RC:$src2),
_.ImmAllZerosV))),
(COPY_TO_REGCLASS
(!cast<Instruction>(Name # "Zrrk")
@@ -5765,7 +6006,7 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
v16i8x_info, NAME#"B">, EVEX_V128;
}
- let Predicates = [HasAVX512, NoVLX] in {
+ let Predicates = [HasBWI, NoVLX] in {
defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
@@ -5791,6 +6032,125 @@ defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
SchedWriteVecLogic>, T8XS;
+
+multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo AndInfo> {
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
+ (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
+ _.RC:$src2)>;
+
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1,
+ (AndInfo.LdFrag addr:$src2)))),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1,
+ (AndInfo.LdFrag addr:$src2)))),
+ _.ImmAllZerosV))),
+ (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
+ addr:$src2)>;
+}
+
+// Patterns to use 512-bit instructions when 128/256 are not available.
+multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
+ X86VectorVTInfo _,
+ X86VectorVTInfo AndInfo,
+ X86VectorVTInfo ExtendInfo> {
+ def : Pat<(_.KVT (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV)),
+ (_.KVT (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstrStr#"rr")
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src1, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src2, _.SubRegIdx)),
+ _.KRC))>;
+
+ def : Pat<(_.KVT (and _.KRC:$mask,
+ (OpNode (bitconvert
+ (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+ _.ImmAllZerosV))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(InstrStr#"rrk")
+ (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src1, _.SubRegIdx),
+ (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+ _.RC:$src2, _.SubRegIdx)),
+ _.KRC)>;
+}
+
+multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
+ Predicate prd,
+ AVX512VLVectorVTInfo CmpInfo,
+ AVX512VLVectorVTInfo AndInfo> {
+let Predicates = [prd, HasVLX] in {
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
+ CmpInfo.info128, AndInfo.info128>;
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
+ CmpInfo.info256, AndInfo.info256>;
+}
+let Predicates = [prd] in {
+ defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info512, AndInfo.info512>;
+}
+
+let Predicates = [prd, NoVLX] in {
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info128, AndInfo.info128,
+ CmpInfo.info512>;
+ defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+ CmpInfo.info256, AndInfo.info256,
+ CmpInfo.info512>;
+}
+}
+
+multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i32_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+ avx512vl_i8_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i32_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+ avx512vl_i16_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+ avx512vl_i32_info, avx512vl_i64_info>;
+
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i8_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i16_info>;
+ defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+ avx512vl_i64_info, avx512vl_i32_info>;
+}
+
+defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
+defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
+
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
@@ -5807,7 +6167,7 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
(i8 imm:$src2)))>,
Sched<[sched.Folded]>;
}
@@ -5826,7 +6186,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, ValueType SrcVT,
- PatFrag bc_frag, X86VectorVTInfo _> {
+ X86VectorVTInfo _> {
// src2 is always 128-bit
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5837,26 +6197,26 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
+ (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
AVX512BIBase,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, ValueType SrcVT,
- PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo VTInfo,
Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
- bc_frag, VTInfo.info512>, EVEX_V512,
+ VTInfo.info512>, EVEX_V512,
EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
- bc_frag, VTInfo.info256>, EVEX_V256,
+ VTInfo.info256>, EVEX_V256,
EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
- bc_frag, VTInfo.info128>, EVEX_V128,
+ VTInfo.info128>, EVEX_V128,
EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
}
}
@@ -5866,12 +6226,12 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
X86SchedWriteWidths sched,
bit NotEVEX2VEXConvertibleQ = 0> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
- bc_v4i32, avx512vl_i32_info, HasAVX512>;
+ avx512vl_i32_info, HasAVX512>;
let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
- bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
+ avx512vl_i64_info, HasAVX512>, VEX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
- bc_v2i64, avx512vl_i16_info, HasBWI>;
+ avx512vl_i16_info, HasBWI>;
}
multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
@@ -5991,9 +6351,9 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
- (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
+ (_.VT (_.LdFrag addr:$src2))))>,
AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6007,7 +6367,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6091,7 +6451,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
_.RC:$src2)>;
- def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
+ def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
@@ -6099,7 +6459,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+ (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
@@ -6108,7 +6468,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+ (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
@@ -6333,9 +6693,9 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode
_.RC:$src1,
- (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+ (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
@@ -6345,7 +6705,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
(Ctrl.VT (X86VBroadcast
(Ctrl.ScalarLdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
@@ -6448,7 +6808,7 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
(OpNode _.RC:$src1,
(_.VT (bitconvert
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
}
// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
@@ -6524,7 +6884,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6532,7 +6892,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6598,7 +6958,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6607,7 +6967,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode _.RC:$src2,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6675,7 +7035,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6685,7 +7045,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6750,7 +7110,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
@@ -6767,7 +7127,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
@@ -7069,7 +7429,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7078,7 +7438,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
_.RC:$src1)>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
@@ -7120,7 +7480,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
let isCodeGenOnly = 1 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
@@ -7139,7 +7499,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
(OpNode (DstVT.VT DstVT.RC:$src1),
(ld_frag addr:$src2),
(i32 FROUND_CURRENT)))]>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}//isCodeGenOnly = 1
}
@@ -7246,26 +7606,26 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
+ SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr,
bit CodeGenOnly = 1> {
let Predicates = [HasAVX512] in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
+ [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
+ [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC,
Sched<[sched]>;
let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
- (i32 FROUND_CURRENT)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7276,9 +7636,10 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
+ SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
string aliasStr> :
- avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
+ avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
let Predicates = [HasAVX512] in {
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
@@ -7287,52 +7648,31 @@ multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
}
// Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
- X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
- X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
- X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
- X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
- X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
- X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
- X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
+defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
- X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-// The SSE version of these instructions are disabled for AVX512.
-// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
-let Predicates = [HasAVX512] in {
- def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
- (VCVTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
- (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
- (VCVTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
- (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
- (VCVTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
- (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
- (VCVTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
- (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
-} // HasAVX512
-
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
@@ -7420,8 +7760,9 @@ def : Pat<(v2f64 (X86Movsd
// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeRnd, X86FoldableSchedWrite sched,
- string aliasStr, bit CodeGenOnly = 1>{
+ SDNode OpNodeInt, SDNode OpNodeRnd,
+ X86FoldableSchedWrite sched, string aliasStr,
+ bit CodeGenOnly = 1>{
let Predicates = [HasAVX512] in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
@@ -7431,13 +7772,12 @@ let Predicates = [HasAVX512] in {
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
- EVEX, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
- (i32 FROUND_CURRENT)))]>,
+ [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
@@ -7448,10 +7788,9 @@ let Predicates = [HasAVX512] in {
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set _DstRC.RC:$dst, (OpNodeRnd
- (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
- (i32 FROUND_CURRENT)))]>,
- EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ [(set _DstRC.RC:$dst,
+ (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7463,9 +7802,10 @@ let Predicates = [HasAVX512] in {
multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeRnd, X86FoldableSchedWrite sched,
+ SDNode OpNodeInt, SDNode OpNodeRnd,
+ X86FoldableSchedWrite sched,
string aliasStr> :
- avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
+ avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
aliasStr, 0> {
let Predicates = [HasAVX512] in {
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7475,49 +7815,30 @@ let Predicates = [HasAVX512] in {
}
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
- VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
- fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
- VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
- XS, EVEX_CD8<32, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
- XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+ "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
- XD, EVEX_CD8<64, CD8VT1>;
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
- fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-
-let Predicates = [HasAVX512] in {
- def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
- (VCVTTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
- (VCVTTSS2SIZrm_Int ssmem:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
- (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
- (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
- (VCVTTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
- (VCVTTSD2SIZrm_Int sdmem:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
- (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
- (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
-} // HasAVX512
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+ "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
@@ -7540,7 +7861,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7551,7 +7872,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -7648,26 +7969,53 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
- string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
+ string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+ RegisterClass MaskRC = _.KRCWM> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+ defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
+ (ins MaskRC:$mask, _Src.RC:$src),
+ OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched]>;
- defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+ defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins MemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
+ (ins MaskRC:$mask, MemOp:$src),
+ OpcodeStr#Alias, "$src", "$src",
(_.VT (OpNode (_Src.VT
- (bitconvert (_Src.LdFrag addr:$src)))))>,
+ (_Src.LdFrag addr:$src)))),
+ (vselect MaskRC:$mask,
+ (_.VT (OpNode (_Src.VT
+ (_Src.LdFrag addr:$src)))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
EVEX, Sched<[sched.Folded]>;
- defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.ScalarMemOp:$src), OpcodeStr,
+ defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.ScalarMemOp:$src),
+ (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
+ (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
+ OpcodeStr,
"${src}"##Broadcast, "${src}"##Broadcast,
(_.VT (OpNode (_Src.VT
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
- ))>, EVEX, EVEX_B,
- Sched<[sched.Folded]>;
+ )),
+ (vselect MaskRC:$mask,
+ (_.VT
+ (OpNode
+ (_Src.VT
+ (X86VBroadcast
+ (_Src.ScalarLdFrag addr:$src))))),
+ _.RC:$src0),
+ vselect, "$src0 = $dst">,
+ EVEX, EVEX_B, Sched<[sched.Folded]>;
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -7718,7 +8066,8 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -7752,6 +8101,35 @@ let Predicates = [HasVLX] in {
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDZ256rm addr:$src)>;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+ (VCVTPD2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+ (VCVTPD2PSZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4f32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
@@ -7836,7 +8214,8 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -7865,8 +8244,9 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
- sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
@@ -8149,6 +8529,122 @@ let Predicates = [HasVLX] in {
(VCVTTPD2UDQZ256rr VR256X:$src)>;
def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
(VCVTTPD2UDQZ256rm addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
+ (VCVTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
+ (VCVTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2DQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvttp2si for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
+ (VCVTTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
+ (VCVTTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTTPD2DQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
+ (VCVTPD2UDQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
+ (VCVTPD2UDQZ128rm addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2UDQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
+ (VCVTTPD2UDQZ128rr VR128X:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
+ (VCVTTPD2UDQZ128rm addr:$src)>;
+ def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTTPD2UDQZ128rmb addr:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
@@ -8365,8 +8861,7 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
(X86cvtph2ps (_src.VT
- (bitconvert
- (ld_frag addr:$src))))>,
+ (ld_frag addr:$src)))>,
T8PD, Sched<[sched.Folded]>;
}
@@ -8381,17 +8876,17 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
let Predicates = [HasAVX512] in
- defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+ load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
+ load, WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
@@ -8406,12 +8901,28 @@ let Predicates = [HasVLX] in {
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
- defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
- (ins _src.RC:$src1, i32u8imm:$src2),
- "vcvtps2ph", "$src2, $src1", "$src1, $src2",
- (X86cvtps2ph (_src.VT _src.RC:$src1),
- (i32 imm:$src2)), 0, 0>,
- AVX512AIi8Base, Sched<[RR]>;
+let ExeDomain = GenericDomain in {
+ def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
+ Sched<[RR]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.RC:$src0, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_K;
+ def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+ (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
+ [(set _dest.RC:$dst,
+ (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+ _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
+ Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
@@ -8423,6 +8934,7 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
EVEX_K, Sched<[MR]>, NotMemoryFoldable;
}
}
+}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
SchedWrite Sched> {
@@ -8483,7 +8995,7 @@ let Predicates = [HasVLX] in {
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
}
-// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
@@ -8549,7 +9061,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2)>, EVEX_4V,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8578,13 +9090,13 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8638,7 +9150,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8674,7 +9186,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.VT
(bitconvert (_.LdFrag addr:$src))),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
@@ -8682,7 +9194,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8749,13 +9261,13 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(fsqrt (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(fsqrt (_.VT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
- EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8810,7 +9322,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(X86fsqrtRnds (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -8828,7 +9340,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8881,7 +9393,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -8893,7 +9405,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9000,14 +9512,47 @@ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
// Integer truncate and extend operations
//-------------------------------------------------
+// PatFrags that contain a select and a truncate op. The take operands in the
+// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
+// either to the multiclasses.
+def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (trunc node:$src), node:$src0)>;
+def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (X86vtruncs node:$src), node:$src0)>;
+def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
+ (vselect node:$mask,
+ (X86vtruncus node:$src), node:$src0)>;
+
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
- let ExeDomain = DestInfo.ExeDomain in
- defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
- (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
- (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
- EVEX, T8XS, Sched<[sched]>;
+ let ExeDomain = DestInfo.ExeDomain in {
+ def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
+ [(set DestInfo.RC:$dst,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
+ EVEX, Sched<[sched]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ [(set DestInfo.RC:$dst,
+ (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+ (DestInfo.VT DestInfo.RC:$src0),
+ SrcInfo.KRCWM:$mask))]>,
+ EVEX, EVEX_K, Sched<[sched]>;
+ def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ [(set DestInfo.RC:$dst,
+ (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
+ EVEX, EVEX_KZ, Sched<[sched]>;
+ }
let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
@@ -9031,14 +9576,18 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
(!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
addr:$dst, SrcInfo.RC:$src)>;
- def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
- (SrcInfo.VT SrcInfo.RC:$src)),
+ def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
+ SrcInfo.KRCWM:$mask),
(!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
- SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
+ SDNode OpNode256, SDNode OpNode512,
+ SDPatternOperator MaskNode128,
+ SDPatternOperator MaskNode256,
+ SDPatternOperator MaskNode512,
+ X86FoldableSchedWrite sched,
AVX512VLVectorVTInfo VTSrcInfo,
X86VectorVTInfo DestInfoZ128,
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -9047,118 +9596,167 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
PatFrag mtruncFrag, Predicate prd = HasAVX512>{
let Predicates = [HasVLX, prd] in {
- defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
truncFrag, mtruncFrag, NAME>, EVEX_V128;
- defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
truncFrag, mtruncFrag, NAME>, EVEX_V256;
}
let Predicates = [prd] in
- defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
VTSrcInfo.info512, DestInfoZ, x86memopZ>,
avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
truncFrag, mtruncFrag, NAME>, EVEX_V512;
}
multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
+ InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
avx512vl_i64_info, v16i8x_info, v16i8x_info,
v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
}
multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+ InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i64_info, v8i16x_info, v8i16x_info,
v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
}
multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+ InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i64_info, v4i32x_info, v4i32x_info,
v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
}
multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+ InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i32_info, v16i8x_info, v16i8x_info,
v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
}
multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
- defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+ InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i32_info, v8i16x_info, v8i16x_info,
v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
}
multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
- PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
+ PatFrag MaskedStoreNode, SDNode InVecNode,
+ SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
- sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
+ InVecMaskNode, MaskNode, MaskNode, sched,
+ avx512vl_i16_info, v16i8x_info, v16i8x_info,
v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
-defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
- truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
- truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
- truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
- truncstorevi32, masked_truncstorevi32, X86vtrunc>;
-defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
- truncstore_s_vi32, masked_truncstore_s_vi32>;
-defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
- truncstore_us_vi32, masked_truncstore_us_vi32>;
-
-defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
- truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
- truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
- truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
- truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
- truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8>;
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
+ WriteShuffle256, truncstorevi16,
+ masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi16,
+ masked_truncstore_s_vi16, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi16, masked_truncstore_us_vi16,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
+ WriteShuffle256, truncstorevi32,
+ masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi32,
+ masked_truncstore_s_vi32, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi32, masked_truncstore_us_vi32,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
+ WriteShuffle256, truncstorevi16,
+ masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi16,
+ masked_truncstore_s_vi16, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi16, masked_truncstore_us_vi16,
+ X86vtruncus, X86vmtruncus>;
+
+defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
+ WriteShuffle256, truncstorevi8,
+ masked_truncstorevi8, X86vtrunc,
+ X86vmtrunc>;
+defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
+ WriteShuffle256, truncstore_s_vi8,
+ masked_truncstore_s_vi8, X86vtruncs,
+ X86vmtruncs>;
+defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
+ select_truncus, WriteShuffle256,
+ truncstore_us_vi8, masked_truncstore_us_vi8,
+ X86vtruncus, X86vmtruncus>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
@@ -9177,6 +9775,44 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
VR256X:$src, sub_ymm))), sub_xmm))>;
}
+// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
+multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
+ X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
+ def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.RC:$src0,
+ SrcInfo.KRCWM:$mask)),
+ (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
+ SrcInfo.KRCWM:$mask,
+ SrcInfo.RC:$src)>;
+
+ def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+ DestInfo.ImmAllZerosV,
+ SrcInfo.KRCWM:$mask)),
+ (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
+ SrcInfo.RC:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
+}
+
+let Predicates = [HasAVX512] in {
+defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
+}
+
multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
@@ -9221,7 +9857,7 @@ multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
@@ -9240,12 +9876,12 @@ multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
- v16i8x_info, i32mem, LdFrag, OpNode>,
+ v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
}
}
@@ -9278,7 +9914,7 @@ multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
- v8i16x_info, i64mem, LdFrag, OpNode>,
+ v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512] in {
@@ -9308,23 +9944,107 @@ multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
}
}
-defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
+
+defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
+
-defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
+// Patterns that we also need any extend versions of. aext_vector_inreg
+// is currently legalized to zext_vector_inreg.
+multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
+ // 256-bit patterns
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ }
+
+ let Predicates = [HasVLX] in {
+ def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ }
+
+ // 512-bit patterns
+ let Predicates = [HasBWI] in {
+ def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
+ }
+ let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
+ def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
+ }
+}
+
+multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
+ AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
+ (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
+ }
+
+ let Predicates = [HasVLX] in {
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
+ (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
+ (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
+ }
+
+ // 512-bit patterns
+ let Predicates = [HasBWI] in {
+ def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
+ (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
+ }
+ let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
+ (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
+ def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
+ (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
+ (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
+ (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
+ }
+}
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
- SDNode InVecOp> {
+ SDNode InVecOp> :
+ AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9335,7 +10055,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
@@ -9345,7 +10065,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -9354,7 +10074,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9365,7 +10085,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -9374,7 +10094,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9385,87 +10105,73 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
- // 256-bit patterns
- let Predicates = [HasVLX, HasBWI] in {
- def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- }
let Predicates = [HasVLX] in {
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
}
// 512-bit patterns
- let Predicates = [HasBWI] in {
- def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
- }
let Predicates = [HasAVX512] in {
- def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
-
- def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
- def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ }
+}
- def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
-
- def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
+defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
- def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
- }
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+ (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
}
-defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
-defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+ (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+ (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+}
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
@@ -9651,6 +10357,10 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
+
+// Also need a pattern for anyextend.
+def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
+ (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
@@ -9724,6 +10434,19 @@ let Predicates = [HasDQI, NoBWI] in {
(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+
+ def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
+ (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+ def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
+ (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+}
+
+let Predicates = [HasDQI, NoBWI, HasVLX] in {
+ def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+
+ def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
+ (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
@@ -9753,8 +10476,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
}
multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
- def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
- (_.VT _.RC:$src)),
+ def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
(!cast<Instruction>(Name#_.ZSuffix##mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
}
@@ -9798,7 +10520,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
(_.VT (X86expand (_.VT (bitconvert
(_.LdFrag addr:$src1)))))>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
@@ -9860,14 +10582,14 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9923,7 +10645,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -9931,7 +10653,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9955,7 +10677,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9974,7 +10696,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -9996,7 +10718,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10360,9 +11082,9 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(_.VT
(bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1,
- (bitconvert (_.LdFrag addr:$src2)),
+ (CastInfo.LdFrag addr:$src2),
(i8 imm:$src3)))))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
@@ -10374,7 +11096,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(X86Shuf128 _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(i8 imm:$src3)))))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10450,7 +11172,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
(_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
(i8 imm:$src3)))>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -10460,7 +11182,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
(X86VAlign _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10526,7 +11248,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
- (bitconvert (To.LdFrag addr:$src2)),
+ (From.LdFrag addr:$src2),
imm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
@@ -10536,7 +11258,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1,
- (bitconvert (To.LdFrag addr:$src2)),
+ (From.LdFrag addr:$src2),
imm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
@@ -10824,6 +11546,8 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ (VMOVDDUPZ128rm addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
@@ -10954,7 +11678,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -11027,7 +11751,7 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
@@ -11067,7 +11791,7 @@ multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
(OpNode (_src.VT _src.RC:$src1),
(_src.VT (bitconvert
(_src.LdFrag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
@@ -11169,7 +11893,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11179,7 +11903,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
@@ -11343,19 +12067,68 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
// TODO: We should maybe have a more generalized algorithm for folding to
// vpternlog.
let Predicates = [HasAVX512] in {
- def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
+ def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
+ (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+ def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}
let Predicates = [HasAVX512, NoVLX] in {
- def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (i8 15)), sub_xmm)>;
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ (EXTRACT_SUBREG
+ (VPTERNLOGQZrri
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+ (i8 15)), sub_ymm)>;
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
@@ -11365,9 +12138,22 @@ let Predicates = [HasAVX512, NoVLX] in {
}
let Predicates = [HasVLX] in {
- def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+ def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+ def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+ def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+ def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
@@ -11395,7 +12181,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11405,7 +12191,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
@@ -11448,7 +12234,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4),
(i32 FROUND_NO_EXC))>,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11458,7 +12244,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4),
(i32 FROUND_CURRENT))>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11680,9 +12466,9 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
+ (VTI.VT (VTI.LdFrag addr:$src3))))>,
AVX512FMA3Base,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11698,7 +12484,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11783,10 +12569,9 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (bitconvert
- (VTI.LdFrag addr:$src3)))))>,
+ (VTI.VT (VTI.LdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
@@ -11795,7 +12580,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (X86VBroadcast
(VTI.ScalarLdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
- T8PD, Sched<[sched.Folded, ReadAfterLd]>;
+ T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11840,9 +12625,9 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
- (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
+ (VTI.VT (VTI.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
@@ -11890,7 +12675,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
(i8 imm:$src3))>, EVEX_B,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index c444fa761960..cb5a4e5b5d41 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -46,11 +46,11 @@ def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
// (and possibly third) value from a register.
// This is used for instructions that put the memory operands before other
// uses.
-class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Register reads (implicit or explicit).
- ReadAfterLd, ReadAfterLd]>;
+ Sched.ReadAfterFold, Sched.ReadAfterFold]>;
// Extra precision multiplication
@@ -63,18 +63,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>, Sched<[WriteIMul]>;
+ (implicit EFLAGS)]>, Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- []>, OpSize16, Sched<[WriteIMul]>;
+ []>, OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
@@ -89,20 +89,20 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>;
+ (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
@@ -110,15 +110,15 @@ let hasSideEffects = 0 in {
// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>,
- Sched<[WriteIMul]>;
+ Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize16, Sched<[WriteIMul]>;
+ OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
@@ -128,19 +128,19 @@ let mayLoad = 1 in {
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>;
+ "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+ "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects
@@ -156,18 +156,18 @@ def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, GR16:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg]>, TB, OpSize16;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, GR32:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg]>, TB, OpSize32;
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))]>,
- Sched<[WriteIMul64]>, TB;
+ Sched<[WriteIMul64Reg]>, TB;
} // isCommutable
// Register-Memory Signed Integer Multiply
@@ -176,19 +176,19 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB;
+ Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
@@ -201,37 +201,37 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
@@ -239,41 +239,41 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1),
i16immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1),
i32immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt32:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt8:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
} // Defs = [EFLAGS]
// unsigned division/remainder
@@ -295,17 +295,17 @@ def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
+ "div{b}\t$src", []>, SchedLoadReg<WriteDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
+ "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
+ "div{l}\t$src", []>, SchedLoadReg<WriteDiv32>, OpSize32;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
+ "div{q}\t$src", []>, SchedLoadReg<WriteDiv64>,
Requires<[In64BitMode]>;
}
@@ -327,16 +327,16 @@ def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
let mayLoad = 1 in {
let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
+ "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
+ "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
+ "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
+ "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64>,
Requires<[In64BitMode]>;
}
} // hasSideEffects = 0
@@ -422,22 +422,35 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
} // SchedRW
} // CodeSize
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -474,16 +487,18 @@ let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -691,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -806,8 +821,8 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.RegClass:$src))]>,
- Sched<[WriteALULd, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
// BinOpMI - Instructions like "add [mem], imm".
class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -839,7 +854,7 @@ class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.ImmOperator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
@@ -872,7 +887,7 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI8<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.Imm8Operator:$src))]>,
- Sched<[WriteALULd]>;
+ Sched<[WriteALU.Folded]>;
// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -913,8 +928,8 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
@@ -931,9 +946,9 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
-
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
@@ -1176,6 +1191,30 @@ let isCompare = 1 in {
defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
}
+// Patterns to recognize loads on the LHS of an ADC. We can't make X86adc_flag
+// commutable since it has EFLAGs as an input.
+def : Pat<(X86adc_flag (loadi8 addr:$src2), GR8:$src1, EFLAGS),
+ (ADC8rm GR8:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi16 addr:$src2), GR16:$src1, EFLAGS),
+ (ADC16rm GR16:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi32 addr:$src2), GR32:$src1, EFLAGS),
+ (ADC32rm GR32:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi64 addr:$src2), GR64:$src1, EFLAGS),
+ (ADC64rm GR64:$src1, addr:$src2)>;
+
+// Patterns to recognize RMW ADC with loads in operand 1.
+def : Pat<(store (X86adc_flag GR8:$src, (loadi8 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (X86adc_flag GR16:$src, (loadi16 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (X86adc_flag GR32:$src, (loadi32 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC32mr addr:$dst, GR32:$src)>;
+def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
+ addr:$dst),
+ (ADC64mr addr:$dst, GR64:$src)>;
//===----------------------------------------------------------------------===//
// Semantically, test instructions are similar like AND, except they don't
@@ -1188,16 +1227,21 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
let isCompare = 1 in {
let Defs = [EFLAGS] in {
let isCommutable = 1 in {
- def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
- def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
- def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
- def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
+ // Avoid selecting these and instead use a test+and. Post processing will
+ // combine them. This gives bunch of other patterns that start with
+ // and a chance to match.
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>;
} // isCommutable
- def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , X86testpat>;
- def TEST16mr : BinOpMR_F<0x84, "test", Xi16, X86testpat>;
- def TEST32mr : BinOpMR_F<0x84, "test", Xi32, X86testpat>;
- def TEST64mr : BinOpMR_F<0x84, "test", Xi64, X86testpat>;
+ let hasSideEffects = 0, mayLoad = 1 in {
+ def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , null_frag>;
+ def TEST16mr : BinOpMR_F<0x84, "test", Xi16, null_frag>;
+ def TEST32mr : BinOpMR_F<0x84, "test", Xi32, null_frag>;
+ def TEST64mr : BinOpMR_F<0x84, "test", Xi64, null_frag>;
+ }
def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
@@ -1235,7 +1279,7 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
(X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- Sched<[WriteALULd, ReadAfterLd]>;
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
// Complexity is reduced to give and with immediate a chance to match first.
@@ -1275,7 +1319,7 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
}
@@ -1283,22 +1327,18 @@ let Predicates = [HasBMI2] in {
//===----------------------------------------------------------------------===//
// ADCX and ADOX Instructions
//
+// We don't have patterns for these as there is no advantage over ADC for
+// most code.
let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
- Constraints = "$src1 = $dst", AddedComplexity = 10 in {
- let SchedRW = [WriteADC] in {
+ Constraints = "$src1 = $dst", hasSideEffects = 0 in {
+ let SchedRW = [WriteADC], isCommutable = 1 in {
def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86adc_flag GR32:$src1, GR32:$src2, EFLAGS))]>, T8PD;
+ "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86adc_flag GR64:$src1, GR64:$src2, EFLAGS))]>, T8PD;
+ "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
- // We don't have patterns for ADOX yet.
- let hasSideEffects = 0 in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1306,26 +1346,17 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
- } // hasSideEffects = 0
} // SchedRW
- let mayLoad = 1, SchedRW = [WriteADCLd, ReadAfterLd] in {
+ let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86adc_flag GR32:$src1, (loadi32 addr:$src2), EFLAGS))]>,
- T8PD;
+ "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86adc_flag GR64:$src1, (loadi64 addr:$src2), EFLAGS))]>,
- T8PD;
+ "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
- // We don't have patterns for ADOX yet.
- let hasSideEffects = 0 in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1333,6 +1364,5 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
- } // hasSideEffects = 0
- } // mayLoad = 1, SchedRW = [WriteADCLd]
+ } // mayLoad, SchedRW
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index eda4ba5ae6f0..f5494fc0b13f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -38,7 +38,7 @@ multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- SchedRW = [Sched.Folded, ReadAfterLd] in {
+ SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 373f85020372..394dca8e7817 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -37,11 +37,6 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP],
def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
"", []>;
-// 64-bit large code model PIC base construction.
-let hasSideEffects = 0, mayLoad = 1, isNotDuplicable = 1, SchedRW = [WriteJump] in
- def MOVGOT64r : PseudoI<(outs GR64:$reg),
- (ins GR64:$scratch, i64i32imm_pcrel:$got), []>;
-
// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
@@ -148,7 +143,7 @@ def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
// These instructions XOR the frame pointer into a GPR. They are used in some
// stack protection schemes. These are post-RA pseudos because we only know the
// frame register after register allocation.
-let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in {
+let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in {
def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
"xorl\t$$FP, $src", []>,
Requires<[NotLP64]>, Sched<[WriteALU]>;
@@ -178,7 +173,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
}
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
- isCodeGenOnly = 1, isReturn = 1 in {
+ isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in {
def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
// CATCHRET needs a custom inserter for SEH.
@@ -240,6 +235,8 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
"#SEH_SaveXMM $reg, $dst", []>;
def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
"#SEH_StackAlloc $size", []>;
+ def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align),
+ "#SEH_StackAlign $align", []>;
def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
"#SEH_SetFrame $reg, $offset", []>;
def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
@@ -273,7 +270,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, AddedComplexity = 10 in
+ isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
@@ -319,16 +316,14 @@ def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
// that would make it more difficult to rematerialize.
let isReMaterializable = 1, isAsCheapAsAMove = 1,
isPseudo = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
-def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;
+def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", []>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
// actually the zero-extension of a 32-bit constant and for labels in the
// x86-64 small code model.
def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
-let AddedComplexity = 1 in
-def : Pat<(i64 mov64imm32:$src),
- (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
+def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
@@ -367,95 +362,109 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
-// (add OP, SETB) -> (adc OP, 0)
-def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
- (ADC8ri GR8:$op, 0)>;
-def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
- (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
- (ADC64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETB) -> (sbb OP, 0)
-def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
- (SBB64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETCC_CARRY) -> (adc OP, 0)
-def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
- (ADC64ri8 GR64:$op, 0)>;
+// Patterns to give priority when both inputs are zero so that we don't use
+// an immediate for the RHS.
+// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
+def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
+ (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
+def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
+ (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
+def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
+ (SBB32rr (MOV32r0), (MOV32r0))>;
+def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
+ (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
+ (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
let SchedRW = [WriteMicrocoded] in {
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP,
- Requires<[Not64BitMode]>;
-def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize16,
- Requires<[Not64BitMode]>;
-def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP, OpSize32,
- Requires<[Not64BitMode]>;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins),
+ "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}",
+ [(X86rep_movs i8)]>, REP, AdSize32,
+ Requires<[NotLP64]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}",
+ [(X86rep_movs i16)]>, REP, AdSize32, OpSize16,
+ Requires<[NotLP64]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}",
+ [(X86rep_movs i32)]>, REP, AdSize32, OpSize32,
+ Requires<[NotLP64]>;
+def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}",
+ [(X86rep_movs i64)]>, REP, AdSize32,
+ Requires<[NotLP64, In64BitMode]>;
}
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
-def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP,
- Requires<[In64BitMode]>;
-def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize16,
- Requires<[In64BitMode]>;
-def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP, OpSize32,
- Requires<[In64BitMode]>;
-def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP,
- Requires<[In64BitMode]>;
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins),
+ "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}",
+ [(X86rep_movs i8)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}",
+ [(X86rep_movs i16)]>, REP, AdSize64, OpSize16,
+ Requires<[IsLP64]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}",
+ [(X86rep_movs i32)]>, REP, AdSize64, OpSize32,
+ Requires<[IsLP64]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins),
+ "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}",
+ [(X86rep_movs i64)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
}
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
let Uses = [AL,ECX,EDI] in
- def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP,
- Requires<[Not64BitMode]>;
+ def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins),
+ "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}",
+ [(X86rep_stos i8)]>, REP, AdSize32,
+ Requires<[NotLP64]>;
let Uses = [AX,ECX,EDI] in
- def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize16,
- Requires<[Not64BitMode]>;
+ def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}",
+ [(X86rep_stos i16)]>, REP, AdSize32, OpSize16,
+ Requires<[NotLP64]>;
let Uses = [EAX,ECX,EDI] in
- def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP, OpSize32,
- Requires<[Not64BitMode]>;
+ def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}",
+ [(X86rep_stos i32)]>, REP, AdSize32, OpSize32,
+ Requires<[NotLP64]>;
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}",
+ [(X86rep_stos i64)]>, REP, AdSize32,
+ Requires<[NotLP64, In64BitMode]>;
}
let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
let Uses = [AL,RCX,RDI] in
- def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP,
- Requires<[In64BitMode]>;
+ def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins),
+ "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}",
+ [(X86rep_stos i8)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
let Uses = [AX,RCX,RDI] in
- def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize16,
- Requires<[In64BitMode]>;
+ def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}",
+ [(X86rep_stos i16)]>, REP, AdSize64, OpSize16,
+ Requires<[IsLP64]>;
let Uses = [RAX,RCX,RDI] in
- def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP, OpSize32,
- Requires<[In64BitMode]>;
+ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}",
+ [(X86rep_stos i32)]>, REP, AdSize64, OpSize32,
+ Requires<[IsLP64]>;
let Uses = [RAX,RCX,RDI] in
- def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP,
- Requires<[In64BitMode]>;
+ def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins),
+ "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}",
+ [(X86rep_stos i64)]>, REP, AdSize64,
+ Requires<[IsLP64]>;
}
} // SchedRW
@@ -567,22 +576,84 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
- defm _F128 : CMOVrr_PSEUDO<VR128, f128>;
- defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
- defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
- defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
- defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;
- defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;
- defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;
- defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;
- defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;
- defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
- defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;
- defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;
- defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;
- defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;
+ let Predicates = [NoVLX] in {
+ defm _VR128 : CMOVrr_PSEUDO<VR128, v2i64>;
+ defm _VR256 : CMOVrr_PSEUDO<VR256, v4i64>;
+ }
+ let Predicates = [HasVLX] in {
+ defm _VR128X : CMOVrr_PSEUDO<VR128X, v2i64>;
+ defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
+ }
+ defm _VR512 : CMOVrr_PSEUDO<VR512, v8i64>;
+ defm _VK2 : CMOVrr_PSEUDO<VK2, v2i1>;
+ defm _VK4 : CMOVrr_PSEUDO<VK4, v4i1>;
+ defm _VK8 : CMOVrr_PSEUDO<VK8, v8i1>;
+ defm _VK16 : CMOVrr_PSEUDO<VK16, v16i1>;
+ defm _VK32 : CMOVrr_PSEUDO<VK32, v32i1>;
+ defm _VK64 : CMOVrr_PSEUDO<VK64, v64i1>;
} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS]
+def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+let Predicates = [NoVLX] in {
+ def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+}
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+ def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+
+ def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+ def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+}
+
+def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -591,12 +662,11 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
// Memory barriers
-// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
-def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "or{l}\t{$zero, $dst|$dst, $zero}", []>,
- Requires<[Not64BitMode]>, OpSize32, LOCK,
- Sched<[WriteALULd, WriteRMW]>;
+def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero),
+ "or{l}\t{$zero, $dst|$dst, $zero}", []>,
+ Requires<[Not64BitMode]>, OpSize32, LOCK,
+ Sched<[WriteALURMW]>;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -610,7 +680,7 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, SDNode Op, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteALURMW] in {
def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
@@ -706,53 +776,64 @@ defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- string frag, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
-def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))]>,
- LOCK;
-def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))]>,
- OpSize16, LOCK;
-def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))]>,
- OpSize32, LOCK;
-def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))]>,
- LOCK;
-}
-}
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_add node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-multiclass unary_atomic_intrin<SDNode atomic_op> {
- def _8 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
- }]>;
- def _16 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
- }]>;
- def _32 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
- }]>;
- def _64 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
- }]>;
-}
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_sub node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
-defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+let Predicates = [UseIncDec] in {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALURMW] in {
+ def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "inc{b}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "inc{w}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "inc{l}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "inc{q}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+
+ def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "dec{b}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "dec{w}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "dec{l}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "dec{q}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+ }
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+ // Additional patterns for -1 constant.
+ def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
+}
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
@@ -766,7 +847,7 @@ let isCodeGenOnly = 1, usesCustomInserter = 1 in {
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
-let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
@@ -787,7 +868,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
}
@@ -811,7 +892,7 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
- SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,
+ SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
def LCMPXCHG8B_SAVE_EBX :
I<0, Pseudo, (outs GR32:$dst),
@@ -823,14 +904,14 @@ def LCMPXCHG8B_SAVE_EBX :
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem>, REX_W;
}
// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
usesCustomInserter = 1 in {
def LCMPXCHG16B_SAVE_RBX :
@@ -847,7 +928,7 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>;
multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag> {
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW] in {
+ SchedRW = [WriteALURMW] in {
def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -887,42 +968,38 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK;
* extremely late to prevent them from being accidentally reordered in the backend
* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
*/
-multiclass RELEASE_BINOP_MI<SDNode op> {
- def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#BINOP "#NAME#"8mi PSEUDO!",
- [(atomic_store_8 addr:$dst, (op
- (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
- def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
- "#BINOP "#NAME#"8mr PSEUDO!",
- [(atomic_store_8 addr:$dst, (op
- (atomic_load_8 addr:$dst), GR8:$src))]>;
- // NAME#16 is not generated as 16-bit arithmetic instructions are considered
- // costly and avoided as far as possible by this backend anyway
- def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#BINOP "#NAME#"32mi PSEUDO!",
- [(atomic_store_32 addr:$dst, (op
- (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
- def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#BINOP "#NAME#"32mr PSEUDO!",
- [(atomic_store_32 addr:$dst, (op
- (atomic_load_32 addr:$dst), GR32:$src))]>;
- def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#BINOP "#NAME#"64mi32 PSEUDO!",
- [(atomic_store_64 addr:$dst, (op
- (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
- def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#BINOP "#NAME#"64mr PSEUDO!",
- [(atomic_store_64 addr:$dst, (op
- (atomic_load_64 addr:$dst), GR64:$src))]>;
-}
-let Defs = [EFLAGS], SchedRW = [WriteMicrocoded] in {
- defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
- defm RELEASE_AND : RELEASE_BINOP_MI<and>;
- defm RELEASE_OR : RELEASE_BINOP_MI<or>;
- defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
- // Note: we don't deal with sub, because substractions of constants are
- // optimized into additions before this code can run.
+multiclass RELEASE_BINOP_MI<string Name, SDNode op> {
+ def : Pat<(atomic_store_8 addr:$dst,
+ (op (atomic_load_8 addr:$dst), (i8 imm:$src))),
+ (!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_16 addr:$dst,
+ (op (atomic_load_16 addr:$dst), (i16 imm:$src))),
+ (!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_32 addr:$dst,
+ (op (atomic_load_32 addr:$dst), (i32 imm:$src))),
+ (!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>;
+ def : Pat<(atomic_store_64 addr:$dst,
+ (op (atomic_load_64 addr:$dst), (i64immSExt32:$src))),
+ (!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>;
+
+ def : Pat<(atomic_store_8 addr:$dst,
+ (op (atomic_load_8 addr:$dst), (i8 GR8:$src))),
+ (!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>;
+ def : Pat<(atomic_store_16 addr:$dst,
+ (op (atomic_load_16 addr:$dst), (i16 GR16:$src))),
+ (!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>;
+ def : Pat<(atomic_store_32 addr:$dst,
+ (op (atomic_load_32 addr:$dst), (i32 GR32:$src))),
+ (!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>;
+ def : Pat<(atomic_store_64 addr:$dst,
+ (op (atomic_load_64 addr:$dst), (i64 GR64:$src))),
+ (!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>;
}
+defm : RELEASE_BINOP_MI<"ADD", add>;
+defm : RELEASE_BINOP_MI<"AND", and>;
+defm : RELEASE_BINOP_MI<"OR", or>;
+defm : RELEASE_BINOP_MI<"XOR", xor>;
+defm : RELEASE_BINOP_MI<"SUB", sub>;
// Same as above, but for floating-point.
// FIXME: imm version.
@@ -947,91 +1024,64 @@ defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
// FIXME: Add fsub, fmul, fdiv, ...
}
-multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
- def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
- "#UNOP "#NAME#"8m PSEUDO!",
- [(atomic_store_8 addr:$dst, dag8)]>;
- def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
- "#UNOP "#NAME#"16m PSEUDO!",
- [(atomic_store_16 addr:$dst, dag16)]>;
- def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
- "#UNOP "#NAME#"32m PSEUDO!",
- [(atomic_store_32 addr:$dst, dag32)]>;
- def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
- "#UNOP "#NAME#"64m PSEUDO!",
- [(atomic_store_64 addr:$dst, dag64)]>;
+multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
+ dag dag64> {
+ def : Pat<(atomic_store_8 addr:$dst, dag8),
+ (!cast<Instruction>(Name#8m) addr:$dst)>;
+ def : Pat<(atomic_store_16 addr:$dst, dag16),
+ (!cast<Instruction>(Name#16m) addr:$dst)>;
+ def : Pat<(atomic_store_32 addr:$dst, dag32),
+ (!cast<Instruction>(Name#32m) addr:$dst)>;
+ def : Pat<(atomic_store_64 addr:$dst, dag64),
+ (!cast<Instruction>(Name#64m) addr:$dst)>;
}
-let Defs = [EFLAGS], Predicates = [UseIncDec], SchedRW = [WriteMicrocoded] in {
- defm RELEASE_INC : RELEASE_UNOP<
+let Predicates = [UseIncDec] in {
+ defm : RELEASE_UNOP<"INC",
(add (atomic_load_8 addr:$dst), (i8 1)),
(add (atomic_load_16 addr:$dst), (i16 1)),
(add (atomic_load_32 addr:$dst), (i32 1)),
(add (atomic_load_64 addr:$dst), (i64 1))>;
- defm RELEASE_DEC : RELEASE_UNOP<
+ defm : RELEASE_UNOP<"DEC",
(add (atomic_load_8 addr:$dst), (i8 -1)),
(add (atomic_load_16 addr:$dst), (i16 -1)),
(add (atomic_load_32 addr:$dst), (i32 -1)),
(add (atomic_load_64 addr:$dst), (i64 -1))>;
}
-/*
-TODO: These don't work because the type inference of TableGen fails.
-TODO: find a way to fix it.
-let Defs = [EFLAGS] in {
- defm RELEASE_NEG : RELEASE_UNOP<
- (ineg (atomic_load_8 addr:$dst)),
- (ineg (atomic_load_16 addr:$dst)),
- (ineg (atomic_load_32 addr:$dst)),
- (ineg (atomic_load_64 addr:$dst))>;
-}
-// NOT doesn't set flags.
-defm RELEASE_NOT : RELEASE_UNOP<
- (not (atomic_load_8 addr:$dst)),
- (not (atomic_load_16 addr:$dst)),
- (not (atomic_load_32 addr:$dst)),
- (not (atomic_load_64 addr:$dst))>;
-*/
-let SchedRW = [WriteMicrocoded] in {
-def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_MOV8mi PSEUDO!",
- [(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
-def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
- "#RELEASE_MOV16mi PSEUDO!",
- [(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
-def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_MOV32mi PSEUDO!",
- [(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
-def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_MOV64mi32 PSEUDO!",
- [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
-
-def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
- "#RELEASE_MOV8mr PSEUDO!",
- [(atomic_store_8 addr:$dst, GR8 :$src)]>;
-def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
- "#RELEASE_MOV16mr PSEUDO!",
- [(atomic_store_16 addr:$dst, GR16:$src)]>;
-def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#RELEASE_MOV32mr PSEUDO!",
- [(atomic_store_32 addr:$dst, GR32:$src)]>;
-def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#RELEASE_MOV64mr PSEUDO!",
- [(atomic_store_64 addr:$dst, GR64:$src)]>;
-
-def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV8rm PSEUDO!",
- [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
-def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV16rm PSEUDO!",
- [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
-def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV32rm PSEUDO!",
- [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
-def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV64rm PSEUDO!",
- [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
-} // SchedRW
+defm : RELEASE_UNOP<"NEG",
+ (ineg (i8 (atomic_load_8 addr:$dst))),
+ (ineg (i16 (atomic_load_16 addr:$dst))),
+ (ineg (i32 (atomic_load_32 addr:$dst))),
+ (ineg (i64 (atomic_load_64 addr:$dst)))>;
+defm : RELEASE_UNOP<"NOT",
+ (not (i8 (atomic_load_8 addr:$dst))),
+ (not (i16 (atomic_load_16 addr:$dst))),
+ (not (i32 (atomic_load_32 addr:$dst))),
+ (not (i64 (atomic_load_64 addr:$dst)))>;
+
+def : Pat<(atomic_store_8 addr:$dst, (i8 imm:$src)),
+ (MOV8mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, (i16 imm:$src)),
+ (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 imm:$src)),
+ (MOV32mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, (i64immSExt32:$src)),
+ (MOV64mi32 addr:$dst, i64immSExt32:$src)>;
+
+def : Pat<(atomic_store_8 addr:$dst, GR8:$src),
+ (MOV8mr addr:$dst, GR8:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, GR16:$src),
+ (MOV16mr addr:$dst, GR16:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, GR32:$src),
+ (MOV32mr addr:$dst, GR32:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, GR64:$src),
+ (MOV64mr addr:$dst, GR64:$src)>;
+
+def : Pat<(i8 (atomic_load_8 addr:$src)), (MOV8rm addr:$src)>;
+def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
+def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
+def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
//===----------------------------------------------------------------------===//
// DAG Pattern Matching Rules
@@ -1041,12 +1091,12 @@ def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
// binary size compared to a regular MOV, but it introduces an unnecessary
// load, so is not suitable for regular or optsize functions.
let Predicates = [OptForMinSize] in {
-def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
}
// In kernel code model, we can get the address of a label
@@ -1128,14 +1178,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, NotUseRetpoline]>;
+ Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>;
+ Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1147,21 +1197,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpoline]>;
+ Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpoline]>;
+ Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, UseRetpoline]>;
+ Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, UseRetpoline]>;
+ Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
@@ -1226,7 +1276,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// i1 stored in one byte in zero-extended form.
// Upper bits cleanup should be executed before Store.
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
@@ -1237,9 +1288,11 @@ def : Pat<(zextloadi64i1 addr:$src),
// defined, avoiding partial-register updates.
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
@@ -1271,6 +1324,15 @@ def : Pat<(i64 (anyext GR16:$src)),
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
+// If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX
+// instead of a MOVZX. The sdivrem lowering will emit emit a MOVSX to move
+// %ah to the lower byte of a register. By using a MOVSX here we allow a
+// post-isel peephole to merge the two MOVSX instructions into one.
+def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{
+ return (N->getOperand(0).getOpcode() == ISD::SDIVREM &&
+ N->getOperand(0).getResNo() == 1);
+}]>;
+def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
@@ -1305,17 +1367,15 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- KnownBits Known0;
- CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
- KnownBits Known1;
- CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+ KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+ KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
return (~Known0.Zero & ~Known1.Zero) == 0;
}]>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
// Try this before the selecting to OR.
-let AddedComplexity = 5, SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU] in {
let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1375,8 +1435,7 @@ def ADD64ri32_DB : I<0, Pseudo,
def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- KnownBits Known;
- CurDAG->computeKnownBits(N->getOperand(1), Known);
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
// If all possible ones in the RHS are set in the LHS then there can't be
// a borrow and we can use xor.
@@ -1973,6 +2032,15 @@ let Predicates = [UseIncDec] in {
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
}
// or reg/reg.
@@ -2081,23 +2149,3 @@ def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
let Predicates = [HasMOVBE] in {
def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
}
-
-// These patterns are selected by some custom code in X86ISelDAGToDAG.cpp that
-// custom combines and+srl into BEXTR. We use these patterns to avoid a bunch
-// of manual code for folding loads.
-let Predicates = [HasBMI, NoTBM] in {
- def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
- (BEXTR32rr GR32:$src1, (MOV32ri imm:$src2))>;
- def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
- (BEXTR32rm addr:$src1, (MOV32ri imm:$src2))>;
- def : Pat<(X86bextr GR64:$src1, mov64imm32:$src2),
- (BEXTR64rr GR64:$src1,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri64 mov64imm32:$src2),
- sub_32bit))>;
- def : Pat<(X86bextr (loadi64 addr:$src1), mov64imm32:$src2),
- (BEXTR64rm addr:$src1,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri64 mov64imm32:$src2),
- sub_32bit))>;
-} // HasBMI, NoTBM
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 650bce74dcf2..a7c7aaab2285 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -222,11 +222,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
- Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
+ Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+ Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
- Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
+ Requires<[Not64BitMode,FavorMemIndirectCall,
+ NotUseRetpolineIndirectCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@@ -320,11 +322,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseRetpoline]>;
+ Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseRetpoline]>;
+ NotUseRetpolineIndirectCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -379,11 +381,11 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
SchedRW = [WriteJump] in {
def RETPOLINE_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
- Requires<[Not64BitMode,UseRetpoline]>;
+ Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
def RETPOLINE_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,UseRetpoline]>;
+ Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
// Retpoline variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index 421792c5599f..c24d6d5b8df1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -163,6 +163,26 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
[(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
+// These instructions exist as a consequence of operand size prefix having
+// control of the destination size, but not the input size. Only support them
+// for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOVSX16rr32: I<0x63, MRMSrcReg, (outs GR16:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALU]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rr32: I<0x63, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALU]>, OpSize32, Requires<[In64BitMode]>;
+let mayLoad = 1 in {
+def MOVSX16rm32: I<0x63, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALULd]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rm32: I<0x63, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteALULd]>, OpSize32, Requires<[In64BitMode]>;
+} // mayLoad = 1
+} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
+
// movzbq and movzwq encodings for the disassembler
let hasSideEffects = 0 in {
def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index a559f62c8f38..1a8e529431af 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -51,7 +51,7 @@ multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -71,7 +71,7 @@ multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -93,7 +93,7 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
@@ -192,7 +192,7 @@ multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
@@ -212,7 +212,7 @@ multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
@@ -234,7 +234,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
@@ -279,7 +279,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
@@ -402,19 +402,19 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
@@ -438,19 +438,19 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
- VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+ VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -477,19 +477,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
- Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
- Sched<[sched.XMM.Folded, ReadAfterLd,
+ Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
- ReadAfterLd]>;
+ sched.XMM.ReadAfterFold]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
@@ -504,19 +504,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
- Sched<[sched.YMM.Folded, ReadAfterLd,
+ Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
- ReadAfterLd]>;
+ sched.YMM.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index cc81a919ec99..5912a3199613 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -592,10 +592,13 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
+let Predicates = [FPStackf32, HasCMov] in
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+let Predicates = [FPStackf64, HasCMov] in
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+let Predicates = [HasCMov] in
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 739275907978..11a27ba90586 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -75,7 +75,7 @@ def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, i8>,
SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -103,23 +103,22 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vzext : SDNode<"X86ISD::VZEXT",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<1, 0>]>>;
-
-def X86vsext : SDNode<"X86ISD::VSEXT",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<1, 0>]>>;
-
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<0, 1>]>;
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
+def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>;
def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>;
+def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
+def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
@@ -144,6 +143,14 @@ def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND",
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
+def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>>;
+
def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVT<2, i8>, SDTCisInt<0>]>;
@@ -182,7 +189,7 @@ def X86phminpos: SDNode<"X86ISD::PHMINPOS",
def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>, SDTCisInt<0>,
- SDTCisInt<1>]>;
+ SDTCisInt<2>]>;
def X86vshl : SDNode<"X86ISD::VSHL", X86vshiftuniform>;
def X86vsrl : SDNode<"X86ISD::VSRL", X86vshiftuniform>;
@@ -237,10 +244,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
-def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
@@ -535,6 +538,8 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisFP<1>,
SDTCisVT<2, i32>]>;
+def SDTSFloatToInt: SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisFP<1>,
+ SDTCisVec<1>]>;
def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
SDTCisVec<1>, SDTCisVT<2, i32>]>;
@@ -548,11 +553,15 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>;
+def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>;
+def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>;
def X86cvtts2IntRnd : SDNode<"X86ISD::CVTTS2SI_RND", SDTSFloatToIntRnd>;
def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND", SDTSFloatToIntRnd>;
-def X86cvts2si : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
-def X86cvts2usi : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
+def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
+def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
+def X86cvts2siRnd : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
+def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
// Vector with rounding mode
@@ -581,6 +590,19 @@ def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
+def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisFP<1>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>;
+
+def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>;
+def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
+def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
+def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+
+
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]> >;
@@ -594,6 +616,13 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
SDTCisVT<2, i32>]> >;
+def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
+ SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisVT<2, i32>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
@@ -641,28 +670,29 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
-// Vector load wrappers to prevent folding of non-temporal aligned loads on
-// supporting targets.
-def vecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return !useNonTemporalLoad(cast<LoadSDNode>(N));
-}]>;
-
// 128-bit load pattern fragments
-// NOTE: all 128-bit integer vector loads are promoted to v2i64
-def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vecload node:$ptr))>;
-def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vecload node:$ptr))>;
-def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vecload node:$ptr))>;
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
+def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
// 256-bit load pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vecload node:$ptr))>;
-def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vecload node:$ptr))>;
-def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vecload node:$ptr))>;
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv16i16 : PatFrag<(ops node:$ptr), (v16i16 (load node:$ptr))>;
+def loadv32i8 : PatFrag<(ops node:$ptr), (v32i8 (load node:$ptr))>;
// 512-bit load pattern fragments
-def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vecload node:$ptr))>;
-def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vecload node:$ptr))>;
-def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vecload node:$ptr))>;
+def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
+def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
+def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
+def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
// 128-/256-/512-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
@@ -676,46 +706,63 @@ def alignedstore : PatFrag<(ops node:$val, node:$ptr),
return St->getAlignment() >= St->getMemoryVT().getStoreSize();
}]>;
-// Like 'load', but always requires 128-bit vector alignment.
-def alignedvecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+// Like 'load', but always requires vector size alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
auto *Ld = cast<LoadSDNode>(N);
- return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize() &&
- !useNonTemporalLoad(cast<LoadSDNode>(N));
+ return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
}]>;
// 128-bit aligned load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
- (v4f32 (alignedvecload node:$ptr))>;
+ (v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
- (v2f64 (alignedvecload node:$ptr))>;
+ (v2f64 (alignedload node:$ptr))>;
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
- (v2i64 (alignedvecload node:$ptr))>;
+ (v2i64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv8i16 : PatFrag<(ops node:$ptr),
+ (v8i16 (alignedload node:$ptr))>;
+def alignedloadv16i8 : PatFrag<(ops node:$ptr),
+ (v16i8 (alignedload node:$ptr))>;
// 256-bit aligned load pattern fragments
// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def alignedloadv8f32 : PatFrag<(ops node:$ptr),
- (v8f32 (alignedvecload node:$ptr))>;
-def alignedloadv4f64 : PatFrag<(ops node:$ptr),
- (v4f64 (alignedvecload node:$ptr))>;
-def alignedloadv4i64 : PatFrag<(ops node:$ptr),
- (v4i64 (alignedvecload node:$ptr))>;
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload node:$ptr))>;
+def alignedloadv16i16 : PatFrag<(ops node:$ptr),
+ (v16i16 (alignedload node:$ptr))>;
+def alignedloadv32i8 : PatFrag<(ops node:$ptr),
+ (v32i8 (alignedload node:$ptr))>;
// 512-bit aligned load pattern fragments
def alignedloadv16f32 : PatFrag<(ops node:$ptr),
- (v16f32 (alignedvecload node:$ptr))>;
+ (v16f32 (alignedload node:$ptr))>;
def alignedloadv8f64 : PatFrag<(ops node:$ptr),
- (v8f64 (alignedvecload node:$ptr))>;
+ (v8f64 (alignedload node:$ptr))>;
def alignedloadv8i64 : PatFrag<(ops node:$ptr),
- (v8i64 (alignedvecload node:$ptr))>;
-
-// Like 'vecload', but uses special alignment checks suitable for use in
+ (v8i64 (alignedload node:$ptr))>;
+def alignedloadv16i32 : PatFrag<(ops node:$ptr),
+ (v16i32 (alignedload node:$ptr))>;
+def alignedloadv32i16 : PatFrag<(ops node:$ptr),
+ (v32i16 (alignedload node:$ptr))>;
+def alignedloadv64i8 : PatFrag<(ops node:$ptr),
+ (v64i8 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
// allows unaligned accesses, match any load, though this may require
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
-def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
auto *Ld = cast<LoadSDNode>(N);
return Subtarget->hasSSEUnalignedMem() ||
Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
@@ -726,6 +773,9 @@ def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
def X86masked_gather : SDNode<"X86ISD::MGATHER",
SDTypeProfile<2, 3, [SDTCisVec<0>,
@@ -828,6 +878,7 @@ def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
// 512-bit bitconvert pattern fragments
def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
+def bc_v32i16 : PatFrag<(ops node:$in), (v32i16 (bitconvert node:$in))>;
def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index e56452362168..ab14ee7fadf2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -411,8 +411,13 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasLoadFromStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -441,8 +446,13 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- const MachineMemOperand *Dummy;
- return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ SmallVector<const MachineMemOperand *, 1> Accesses;
+ if (hasStoreToStackSlot(MI, Accesses)) {
+ FrameIndex =
+ cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+ ->getFrameIndex();
+ return 1;
+ }
}
return 0;
}
@@ -708,7 +718,7 @@ bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
}
/// Check whether the shift count for a machine operand is non-zero.
-inline static unsigned getTruncatedShiftCount(MachineInstr &MI,
+inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
unsigned ShiftAmtOperandIdx) {
// The shift count is six bits with the REX.W prefix and five bits without.
unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
@@ -729,8 +739,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, unsigned &NewSrc,
- bool &isKill, bool &isUndef,
- MachineOperand &ImplicitOp,
+ bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
@@ -747,7 +756,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
if (Opc != X86::LEA64_32r) {
NewSrc = SrcReg;
isKill = Src.isKill();
- isUndef = Src.isUndef();
+ assert(!Src.isUndef() && "Undef op doesn't need optimization");
if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
!MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -764,7 +773,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
isKill = Src.isKill();
- isUndef = Src.isUndef();
+ assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
@@ -776,7 +785,6 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Which is obviously going to be dead after we're done with it.
isKill = true;
- isUndef = false;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
@@ -786,88 +794,99 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
return true;
}
-/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
-/// LEA to form 3-address code by promoting to a 32-bit superregister and then
-/// truncating back down to a 16-bit subregister.
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
LiveVariables *LV) const {
- MachineBasicBlock::iterator MBBI = MI.getIterator();
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
- bool isDead = MI.getOperand(0).isDead();
- bool isKill = MI.getOperand(1).isKill();
-
+ // We handle 8-bit adds and various 16-bit opcodes in the switch below.
+ bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned Opc, leaInReg;
- if (Subtarget.is64Bit()) {
- Opc = X86::LEA64_32r;
- leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
- } else {
- Opc = X86::LEA32r;
- leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
- }
+ assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+ *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
+ "Unexpected type for LEA transform");
+
+ // TODO: For a 32-bit target, we need to adjust the LEA variables with
+ // something like this:
+ // Opcode = X86::LEA32r;
+ // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ // OutRegLEA =
+ // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
+ // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ if (!Subtarget.is64Bit())
+ return nullptr;
+
+ unsigned Opcode = X86::LEA64_32r;
+ unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // we will be shifting and then extracting the lower 8/16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
// But testing has shown this *does* help performance in 64-bit mode (at
// least on modern x86 machines).
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
+ unsigned Dest = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+ bool IsDead = MI.getOperand(0).isDead();
+ bool IsKill = MI.getOperand(1).isKill();
+ unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
+ assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
+ BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(leaInReg, RegState::Define, X86::sub_16bit)
- .addReg(Src, getKillRegState(isKill));
+ .addReg(InRegLEA, RegState::Define, SubReg)
+ .addReg(Src, getKillRegState(IsKill));
MachineInstrBuilder MIB =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg);
+ BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0).addImm(1ULL << ShAmt)
- .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+ .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
- addRegOffset(MIB, leaInReg, true, 1);
+ addRegOffset(MIB, InRegLEA, true, 1);
break;
case X86::DEC16r:
- addRegOffset(MIB, leaInReg, true, -1);
+ addRegOffset(MIB, InRegLEA, true, -1);
break;
+ case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm());
+ addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
+ case X86::ADD8rr:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
unsigned Src2 = MI.getOperand(2).getReg();
- bool isKill2 = MI.getOperand(2).isKill();
- unsigned leaInReg2 = 0;
+ bool IsKill2 = MI.getOperand(2).isKill();
+ assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
+ unsigned InRegLEA2 = 0;
MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
- // ADD16rr killed %reg1028, %reg1028
+ // ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
- addRegReg(MIB, leaInReg, true, leaInReg, false);
+ addRegReg(MIB, InRegLEA, true, InRegLEA, false);
} else {
if (Subtarget.is64Bit())
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+ InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
else
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+ InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ // we will be shifting and then extracting the lower 8/16-bits.
+ BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
- .addReg(Src2, getKillRegState(isKill2));
- addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ .addReg(InRegLEA2, RegState::Define, SubReg)
+ .addReg(Src2, getKillRegState(IsKill2));
+ addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
}
- if (LV && isKill2 && InsMI2)
+ if (LV && IsKill2 && InsMI2)
LV->replaceKillInstruction(Src2, MI, *InsMI2);
break;
}
@@ -876,16 +895,16 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+ .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
+ .addReg(OutRegLEA, RegState::Kill, SubReg);
if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
+ // Update live variables.
+ LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
+ LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
+ if (IsKill)
LV->replaceKillInstruction(Src, MI, *InsMI);
- if (isDead)
+ if (IsDead)
LV->replaceKillInstruction(Dest, MI, *ExtMI);
}
@@ -916,12 +935,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
const MachineOperand &Dest = MI.getOperand(0);
const MachineOperand &Src = MI.getOperand(1);
+ // Ideally, operations with undef should be folded before we get here, but we
+ // can't guarantee it. Bail out because optimizing undefs is a waste of time.
+ // Without this, we have to forward undef state to new register operands to
+ // avoid machine verifier errors.
+ if (Src.isUndef())
+ return nullptr;
+ if (MI.getNumOperands() > 2)
+ if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
+ return nullptr;
+
MachineInstr *NewMI = nullptr;
- // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
- // we have better subtarget support, enable the 16-bit LEA generation here.
- // 16-bit LEA is also slow on Core2.
- bool DisableLEA16 = true;
- bool is64Bit = Subtarget.is64Bit();
+ bool Is64Bit = Subtarget.is64Bit();
unsigned MIOpc = MI.getOpcode();
switch (MIOpc) {
@@ -951,14 +976,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
// LEA can't handle ESP.
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@@ -966,7 +991,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+ .addReg(SrcReg, getKillRegState(isKill))
.addImm(0)
.addReg(0);
if (ImplicitOp.getReg() != 0)
@@ -978,37 +1003,26 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHL16ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
-
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .add(Dest)
- .addReg(0)
- .addImm(1ULL << ShAmt)
- .add(Src)
- .addImm(0)
- .addReg(0);
- break;
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
}
case X86::INC64r:
case X86::INC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill, isUndef;
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
+ (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+ ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1016,30 +1030,23 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::INC16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1);
- break;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::DEC64r:
case X86::DEC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+ ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) |
- getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1048,13 +1055,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::DEC16r:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1);
- break;
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
@@ -1064,21 +1065,21 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
Opc = X86::LEA64r;
else
- Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
const MachineOperand &Src2 = MI.getOperand(2);
- bool isKill2, isUndef2;
+ bool isKill2;
unsigned SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
- SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
+ SrcReg2, isKill2, ImplicitOp2, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
@@ -1088,36 +1089,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MIB.add(ImplicitOp2);
NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
-
- // Preserve undefness of the operands.
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
-
if (LV && Src2.isKill())
LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
break;
}
+ case X86::ADD8rr:
case X86::ADD16rr:
- case X86::ADD16rr_DB: {
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Src2 = MI.getOperand(2).getReg();
- bool isKill2 = MI.getOperand(2).isKill();
- NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest),
- Src.getReg(), Src.isKill(), Src2, isKill2);
-
- // Preserve undefness of the operands.
- bool isUndef = MI.getOperand(1).isUndef();
- bool isUndef2 = MI.getOperand(2).isUndef();
- NewMI->getOperand(1).setIsUndef(isUndef);
- NewMI->getOperand(3).setIsUndef(isUndef2);
-
- if (LV && isKill2)
- LV->replaceKillInstruction(Src2, MI, *NewMI);
- break;
- }
+ case X86::ADD16rr_DB:
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -1132,38 +1111,30 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32ri_DB:
case X86::ADD32ri8_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill, isUndef;
+ bool isKill;
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, isUndef, ImplicitOp, LV))
+ SrcReg, isKill, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
- .addReg(SrcReg, getUndefRegState(isUndef) |
- getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
NewMI = addOffset(MIB, MI.getOperand(2));
break;
}
+ case X86::ADD8ri:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
- : nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
- MI.getOperand(2));
- break;
-
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::VMOVDQU8Z128rmk:
case X86::VMOVDQU8Z256rmk:
case X86::VMOVDQU8Zrmk:
@@ -2540,7 +2511,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
// call. This way they still appear live across the call.
LivePhysRegs LiveRegs(getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
- SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+ SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
LiveRegs.stepForward(*MIB, Clobbers);
for (const auto &C : Clobbers) {
MIB.addReg(C.first, RegState::Implicit);
@@ -2630,6 +2601,11 @@ bool X86InstrInfo::AnalyzeBranchImpl(
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+ // In practice we should never have an undef eflags operand, if we do
+ // abort here as we are not prepared to preserve the flag.
+ if (I->getOperand(1).isUndef())
+ return true;
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
@@ -3112,9 +3088,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
report_fatal_error("Cannot emit physreg copy instruction");
}
-bool X86InstrInfo::isCopyInstr(const MachineInstr &MI,
- const MachineOperand *&Src,
- const MachineOperand *&Dest) const {
+bool X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+ const MachineOperand *&Src,
+ const MachineOperand *&Dest) const {
if (MI.isMoveReg()) {
Dest = &MI.getOperand(0);
Src = &MI.getOperand(1);
@@ -3242,9 +3218,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
-bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool X86InstrInfo::getMemOperandWithOffset(
+ MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
+ const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
if (MemRefBegin < 0)
@@ -3252,11 +3228,10 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
MemRefBegin += X86II::getOperandBias(Desc);
- MachineOperand &BaseMO = MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
- if (!BaseMO.isReg()) // Can be an MO_FrameIndex
+ BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+ if (!BaseOp->isReg()) // Can be an MO_FrameIndex
return false;
- BaseReg = BaseMO.getReg();
if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
return false;
@@ -3272,6 +3247,8 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
Offset = DispMO.getImm();
+ assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+ "operands of type register.");
return true;
}
@@ -3303,29 +3280,25 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
- DebugLoc DL = MBB.findDebugLoc(MI);
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
.addReg(SrcReg, getKillRegState(isKill));
}
-void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
- bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+ ArrayRef<MachineMemOperand *> MMOs,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = MMOBegin != MMOEnd &&
- (*MMOBegin)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.add(Addr[i]);
MIB.addReg(SrcReg, getKillRegState(isKill));
- (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
}
@@ -3341,26 +3314,23 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
- DebugLoc DL = MBB.findDebugLoc(MI);
- addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::loadRegFromAddr(
+ MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+ ArrayRef<MachineMemOperand *> MMOs,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = MMOBegin != MMOEnd &&
- (*MMOBegin)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.add(Addr[i]);
- (*MIB).setMemRefs(MMOBegin, MMOEnd);
+ MIB.setMemRefs(MMOs);
NewMIs.push_back(MIB);
}
@@ -3451,9 +3421,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// This function can be extended later on.
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
- unsigned SrcReg2, int ImmMask,
- int ImmValue, MachineInstr &OI) {
+inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+ unsigned SrcReg, unsigned SrcReg2,
+ int ImmMask, int ImmValue,
+ const MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
@@ -3484,7 +3455,9 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
/// Check whether the definition can be converted
/// to remove a comparison against zero.
-inline static bool isDefConvertible(MachineInstr &MI) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+ NoSignFlag = false;
+
switch (MI.getOpcode()) {
default: return false;
@@ -3549,8 +3522,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
case X86::ANDN32rr: case X86::ANDN32rm:
case X86::ANDN64rr: case X86::ANDN64rm:
- case X86::BEXTR32rr: case X86::BEXTR64rr:
- case X86::BEXTR32rm: case X86::BEXTR64rm:
case X86::BLSI32rr: case X86::BLSI32rm:
case X86::BLSI64rr: case X86::BLSI64rm:
case X86::BLSMSK32rr:case X86::BLSMSK32rm:
@@ -3568,8 +3539,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::TZCNT16rr: case X86::TZCNT16rm:
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
- case X86::BEXTRI32ri: case X86::BEXTRI32mi:
- case X86::BEXTRI64ri: case X86::BEXTRI64mi:
case X86::BLCFILL32rr: case X86::BLCFILL32rm:
case X86::BLCFILL64rr: case X86::BLCFILL64rm:
case X86::BLCI32rr: case X86::BLCI32rm:
@@ -3584,12 +3553,23 @@ inline static bool isDefConvertible(MachineInstr &MI) {
case X86::BLSFILL64rr: case X86::BLSFILL64rm:
case X86::BLSIC32rr: case X86::BLSIC32rm:
case X86::BLSIC64rr: case X86::BLSIC64rm:
+ case X86::T1MSKC32rr: case X86::T1MSKC32rm:
+ case X86::T1MSKC64rr: case X86::T1MSKC64rm:
+ case X86::TZMSK32rr: case X86::TZMSK32rm:
+ case X86::TZMSK64rr: case X86::TZMSK64rm:
+ return true;
+ case X86::BEXTR32rr: case X86::BEXTR64rr:
+ case X86::BEXTR32rm: case X86::BEXTR64rm:
+ case X86::BEXTRI32ri: case X86::BEXTRI32mi:
+ case X86::BEXTRI64ri: case X86::BEXTRI64mi:
+ // BEXTR doesn't update the sign flag so we can't use it.
+ NoSignFlag = true;
return true;
}
}
/// Check whether the use can be converted to remove a comparison against zero.
-static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
+static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
case X86::LZCNT16rr: case X86::LZCNT16rm:
@@ -3604,12 +3584,12 @@ static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
return X86::COND_B;
- case X86::BSF16rr:
- case X86::BSF16rm:
- case X86::BSF32rr:
- case X86::BSF32rm:
- case X86::BSF64rr:
- case X86::BSF64rm:
+ case X86::BSF16rr: case X86::BSF16rm:
+ case X86::BSF32rr: case X86::BSF32rm:
+ case X86::BSF64rr: case X86::BSF64rm:
+ case X86::BSR16rr: case X86::BSR16rm:
+ case X86::BSR32rr: case X86::BSR32rm:
+ case X86::BSR64rr: case X86::BSR64rm:
return X86::COND_E;
}
}
@@ -3687,8 +3667,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// instruction we can eliminate the compare iff the use sets EFLAGS in the
// right way.
bool ShouldUpdateCC = false;
+ bool NoSignFlag = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI)) {
+ if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
// Scan forward from the use until we hit the use we're looking for or the
// compare instruction.
for (MachineBasicBlock::iterator J = MI;; ++J) {
@@ -3807,6 +3788,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
case X86::COND_O: case X86::COND_NO:
// CF and OF are used, we can't perform this optimization.
return false;
+ case X86::COND_S: case X86::COND_NS:
+ // If SF is used, but the instruction doesn't update the SF, then we
+ // can't do the optimization.
+ if (NoSignFlag)
+ return false;
+ break;
}
// If we're updating the condition code check if we have to reverse the
@@ -4267,9 +4254,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::VMOVUPSZ256mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
- case X86::MOV32ri64:
+ case X86::MOV32ri64: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
+ MIB->getOperand(0).setReg(Reg32);
+ MIB.addReg(Reg, RegState::ImplicitDefine);
return true;
+ }
// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
@@ -5353,6 +5345,54 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
+static SmallVector<MachineMemOperand *, 2>
+extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+ SmallVector<MachineMemOperand *, 2> LoadMMOs;
+
+ for (MachineMemOperand *MMO : MMOs) {
+ if (!MMO->isLoad())
+ continue;
+
+ if (!MMO->isStore()) {
+ // Reuse the MMO.
+ LoadMMOs.push_back(MMO);
+ } else {
+ // Clone the MMO and unset the store flag.
+ LoadMMOs.push_back(MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
+ MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering()));
+ }
+ }
+
+ return LoadMMOs;
+}
+
+static SmallVector<MachineMemOperand *, 2>
+extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+ SmallVector<MachineMemOperand *, 2> StoreMMOs;
+
+ for (MachineMemOperand *MMO : MMOs) {
+ if (!MMO->isStore())
+ continue;
+
+ if (!MMO->isLoad()) {
+ // Reuse the MMO.
+ StoreMMOs.push_back(MMO);
+ } else {
+ // Clone the MMO and unset the load flag.
+ StoreMMOs.push_back(MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
+ MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering()));
+ }
+ }
+
+ return StoreMMOs;
+}
+
bool X86InstrInfo::unfoldMemoryOperand(
MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
@@ -5397,9 +5437,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
// Emit the load instruction.
if (UnfoldLoad) {
- std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+ auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
+ loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
@@ -5464,9 +5503,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
// Emit the store instruction.
if (UnfoldStore) {
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
- std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+ auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
}
return true;
@@ -5511,26 +5549,21 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SDNode *Load = nullptr;
if (FoldedLoad) {
EVT VT = *TRI.legalclasstypes_begin(*RC);
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
- if (!(*MMOs.first) &&
- RC == &X86::VR128RegClass &&
+ auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+ if (MMOs.empty() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = (*MMOs.first) &&
- (*MMOs.first)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
// Preserve memory reference information.
- cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
}
// Emit the data processing instruction.
@@ -5580,27 +5613,22 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.pop_back();
AddrOps.push_back(SDValue(NewNode, 0));
AddrOps.push_back(Chain);
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
- if (!(*MMOs.first) &&
- RC == &X86::VR128RegClass &&
+ auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+ if (MMOs.empty() && RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = (*MMOs.first) &&
- (*MMOs.first)->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
SDNode *Store =
DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
dl, MVT::Other, AddrOps);
NewNodes.push_back(Store);
// Preserve memory reference information.
- cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
+ DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
}
return true;
@@ -6511,6 +6539,19 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
// All domains are valid.
return 0xe;
+ case X86::MOVHLPSrr:
+ // We can swap domains when both inputs are the same register.
+ // FIXME: This doesn't catch all the cases we would like. If the input
+ // register isn't KILLed by the instruction, the two address instruction
+ // pass puts a COPY on one input. The other input uses the original
+ // register. This prevents the same physical register from being used by
+ // both inputs.
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+ MI.getOperand(0).getSubReg() == 0 &&
+ MI.getOperand(1).getSubReg() == 0 &&
+ MI.getOperand(2).getSubReg() == 0)
+ return 0x6;
+ return 0;
}
return 0;
}
@@ -6617,6 +6658,20 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
MI.setDesc(get(table[Domain - 1]));
return true;
}
+ case X86::UNPCKHPDrr:
+ case X86::MOVHLPSrr:
+ // We just need to commute the instruction which will switch the domains.
+ if (Domain != dom && Domain != 3 &&
+ MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+ MI.getOperand(0).getSubReg() == 0 &&
+ MI.getOperand(1).getSubReg() == 0 &&
+ MI.getOperand(2).getSubReg() == 0) {
+ commuteInstruction(MI, false);
+ return true;
+ }
+ // We must always return true for MOVHLPSrr.
+ if (Opcode == X86::MOVHLPSrr)
+ return true;
}
return false;
}
@@ -7339,7 +7394,8 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
{MO_TLVP, "x86-tlvp"},
{MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
- {MO_SECREL, "x86-secrel"}};
+ {MO_SECREL, "x86-secrel"},
+ {MO_COFFSTUB, "x86-coffstub"}};
return makeArrayRef(TargetFlags);
}
@@ -7396,12 +7452,28 @@ namespace {
.addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
.addReg(0);
} else if (TM->getCodeModel() == CodeModel::Large) {
- // Loading the GOT in the large code model requires math with labels,
- // so we use a pseudo instruction and expand it during MC emission.
- unsigned Scratch = RegInfo.createVirtualRegister(&X86::GR64RegClass);
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVGOT64r), PC)
- .addReg(Scratch, RegState::Undef | RegState::Define)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ // In the large code model, we are aiming for this code, though the
+ // register allocation may vary:
+ // leaq .LN$pb(%rip), %rax
+ // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
+ // addq %rcx, %rax
+ // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
+ unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ unsigned GOTReg =
+ RegInfo.createVirtualRegister(&X86::GR64RegClass);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addSym(MF.getPICBaseSymbol())
+ .addReg(0);
+ std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_PIC_BASE_OFFSET);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
+ .addReg(PBReg, RegState::Kill)
+ .addReg(GOTReg, RegState::Kill);
} else {
llvm_unreachable("unexpected code model");
}
@@ -7736,3 +7808,6 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
return It;
}
+
+#define GET_INSTRINFO_HELPERS
+#include "X86GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index b1ceb767cce4..159cb50afc5c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -117,6 +117,7 @@ inline static bool isGlobalStubReference(unsigned char TargetFlag) {
case X86II::MO_GOT: // normal GOT reference.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
+ case X86II::MO_COFFSTUB: // COFF .refptr stub.
return true;
default:
return false;
@@ -257,7 +258,7 @@ public:
/// operand to the LEA instruction.
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
- bool &isKill, bool &isUndef, MachineOperand &ImplicitOp,
+ bool &isKill, MachineOperand &ImplicitOp,
LiveVariables *LV) const;
/// convertToThreeAddress - This method must be implemented by targets that
@@ -326,9 +327,9 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
- bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ int64_t &Offset,
+ const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
TargetInstrInfo::MachineBranchPredicate &MBP,
bool AllowModify = false) const override;
@@ -348,8 +349,6 @@ public:
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
- bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
- const MachineOperand *&Dest) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned SrcReg,
bool isKill, int FrameIndex,
@@ -359,8 +358,7 @@ public:
void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
+ ArrayRef<MachineMemOperand *> MMOs,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -371,8 +369,7 @@ public:
void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
const TargetRegisterClass *RC,
- MachineInstr::mmo_iterator MMOBegin,
- MachineInstr::mmo_iterator MMOEnd,
+ ArrayRef<MachineMemOperand *> MMOs,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
bool expandPostRAPseudo(MachineInstr &MI) const override;
@@ -561,6 +558,9 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
+#define GET_INSTRINFO_HELPER_DECLS
+#include "X86GenInstrInfo.inc"
+
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
@@ -577,7 +577,16 @@ protected:
unsigned CommuteOpIdx1,
unsigned CommuteOpIdx2) const override;
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
+ /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
+ /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
+ /// super-register and then truncating back down to a 8/16-bit sub-register.
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineInstr &MI,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index bc7afd32d494..e53f83baa3c6 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -17,10 +17,6 @@
// X86 specific DAG Nodes.
//
-def SDTIntShiftDOp: SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisInt<3>]>;
-
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
@@ -257,8 +253,6 @@ def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
-def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
-def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
@@ -282,15 +276,10 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
+def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
+
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
@@ -955,8 +944,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseRetpoline : Predicate<"Subtarget->useRetpoline()">;
-def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">;
+def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
+def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -1210,12 +1199,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
OpSize32, Requires<[Not64BitMode]>;
-} // mayStore, mayLoad, WriteRMW
+} // mayStore, mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
@@ -1243,7 +1232,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
OpSize16;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
@@ -1302,7 +1291,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
OpSize32, Requires<[In64BitMode]>;
let mayStore = 1, SchedRW = [WriteStore] in {
@@ -1314,7 +1303,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
OpSize32, Requires<[In64BitMode]>;
} // mayLoad, mayStore, SchedRW
@@ -1491,7 +1480,7 @@ def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, imm:$src)]>;
@@ -1505,7 +1494,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)]>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
"movabs{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, relocImm:$src)]>;
@@ -1771,7 +1760,7 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
// only for now. These instructions are also slow on modern CPUs so that's
// another reason to avoid generating them.
-let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[]>, OpSize16, TB, NotMemoryFoldable;
@@ -1799,7 +1788,7 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
// Note that these instructions aren't slow because that only applies when the
// other operand is in a register. When it's an immediate, bt is still fast.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTestImmLd] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1),
@@ -1818,7 +1807,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1830,7 +1819,7 @@ def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1831,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1851,7 +1840,7 @@ def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1861,7 +1850,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1873,7 +1862,7 @@ def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1874,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1896,7 +1885,7 @@ def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1897,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1920,7 +1909,7 @@ def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
NotMemoryFoldable;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1921,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1941,7 +1930,7 @@ def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
"bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1994,7 +1983,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag>
defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap">, NotMemoryFoldable;
// Swap between registers.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteXCHG] in {
let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in {
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2),
(ins GR8:$src1, GR8:$src2),
@@ -2027,7 +2016,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
} // SchedRW
let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2",
- Defs = [EFLAGS], SchedRW = [WriteALU] in {
+ Defs = [EFLAGS], SchedRW = [WriteXCHG] in {
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2),
(ins GR8:$src1, GR8:$src2),
"xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB;
@@ -2061,7 +2050,7 @@ def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst),
}
-let SchedRW = [WriteALU], hasSideEffects = 0 in {
+let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB,
@@ -2080,7 +2069,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
NotMemoryFoldable;
} // SchedRW, hasSideEffects
-let SchedRW = [WriteALULd, WriteRMW], mayLoad = 1, mayStore = 1,
+let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1,
hasSideEffects = 0 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -2368,11 +2357,11 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
let hasSideEffects = 0 in {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteALU]>;
+ T8PS, VEX_4V, Sched<[WriteBLS]>;
let mayLoad = 1 in
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[WriteALULd]>;
+ T8PS, VEX_4V, Sched<[WriteBLS.Folded]>;
}
}
@@ -2389,6 +2378,16 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
// Pattern fragments to auto generate BMI instructions.
//===----------------------------------------------------------------------===//
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86or_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86xor_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
let Predicates = [HasBMI] in {
// FIXME: patterns for the load versions are not implemented
def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2405,6 +2404,14 @@ let Predicates = [HasBMI] in {
(BLSI32rr GR32:$src)>;
def : Pat<(and GR64:$src, (ineg GR64:$src)),
(BLSI64rr GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSMSK64rr GR64:$src)>;
}
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2423,7 +2430,7 @@ multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -2449,14 +2456,14 @@ multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ Sched.ReadAfterFold]>;
}
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
- int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
+ X86bzhi, loadi32, WriteBZHI>;
defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
- int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
+ X86bzhi, loadi64, WriteBZHI>, VEX_W;
}
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -2497,84 +2504,6 @@ let Predicates = [HasBMI2, NoTBM] in {
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
}
-let Predicates = [HasBMI2] in {
- multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
- ValueType VT, Instruction DstInst,
- Instruction DstMemInst> {
- def : Pat<regpattern,
- (DstInst RC:$src,
- (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
- def : Pat<mempattern,
- (DstMemInst addr:$src,
- (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
- }
-
- multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
- Instruction DstInst, X86MemOperand x86memop,
- Instruction DstMemInst> {
- // x & ((1 << y) - 1)
- defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
- (and (x86memop addr:$src),
- (add (shl 1, GR8:$lz), -1)),
- RC, VT, DstInst, DstMemInst>;
-
- // x & ~(-1 << y)
- defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
- (and (x86memop addr:$src),
- (xor (shl -1, GR8:$lz), -1)),
- RC, VT, DstInst, DstMemInst>;
-
- // x & (-1 >> (bitwidth - y))
- defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
- (and (x86memop addr:$src),
- (srl -1, (sub bitwidth, GR8:$lz))),
- RC, VT, DstInst, DstMemInst>;
-
- // x << (bitwidth - y) >> (bitwidth - y)
- defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
- (sub bitwidth, GR8:$lz)),
- (srl (shl (x86memop addr:$src),
- (sub bitwidth, GR8:$lz)),
- (sub bitwidth, GR8:$lz)),
- RC, VT, DstInst, DstMemInst>;
- }
-
- defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
- defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
-
- // x & (-1 >> (32 - y))
- def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
- (BZHI32rr GR32:$src, GR32:$lz)>;
- def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
- (BZHI32rm addr:$src, GR32:$lz)>;
-
- // x & (-1 >> (64 - y))
- def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
- def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-
- // x << (32 - y) >> (32 - y)
- def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
- (i8 (trunc (sub 32, GR32:$lz)))),
- (BZHI32rr GR32:$src, GR32:$lz)>;
- def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
- (i8 (trunc (sub 32, GR32:$lz)))),
- (BZHI32rm addr:$src, GR32:$lz)>;
-
- // x << (64 - y) >> (64 - y)
- def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
- (i8 (trunc (sub 64, GR32:$lz)))),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
- def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
- (i8 (trunc (sub 64, GR32:$lz)))),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-} // HasBMI2
-
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
@@ -2585,7 +2514,7 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALULd, ReadAfterLd]>;
+ VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
let Predicates = [HasBMI2] in {
@@ -2881,6 +2810,45 @@ let Predicates = [HasTBM] in {
(TZMSK32rr GR32:$src)>;
def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
(TZMSK64rr GR64:$src)>;
+
+ // Patterns to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
} // HasTBM
//===----------------------------------------------------------------------===//
@@ -2976,6 +2944,8 @@ def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popf", "popfq", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popfd", "popfl", "att">;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In64BitMode]>;
// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
// all modes. However: "push (addr)" and "push $42" should default to
@@ -2988,6 +2958,8 @@ def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
def : MnemonicAlias<"pushf", "pushfq", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"pushfd", "pushfl", "att">;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popad", "popal", "intel">, Requires<[Not64BitMode]>;
def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index aefeffedfc1a..8f3357170576 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -47,7 +47,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -64,7 +64,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -105,7 +105,7 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
[(set VR64:$dst,
(IntId64 VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -122,7 +122,7 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -553,7 +553,7 @@ let Predicates = [HasMMX, HasSSE1] in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 6a9b20998210..e2bcd18ce660 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -35,7 +35,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -57,7 +57,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -81,7 +81,7 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -103,7 +103,7 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -265,8 +265,6 @@ let Predicates = [UseAVX] in {
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
@@ -349,8 +347,6 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
@@ -593,8 +589,21 @@ let Predicates = [HasAVX, NoVLX] in {
// available and changing the domain is beneficial.
def : Pat<(alignedloadv4i64 addr:$src),
(VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv16i16 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv32i8 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
(VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv16i16 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv32i8 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+
def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
@@ -619,8 +628,20 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseSSE1] in {
def : Pat<(alignedloadv2i64 addr:$src),
(MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
(MOVUPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, VR128:$src)>;
@@ -652,7 +673,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
[], SSEPackedSingle>, PS,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
@@ -660,7 +681,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode,
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, PD,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
@@ -820,19 +841,6 @@ let Constraints = "$src1 = $dst" in {
Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
-// TODO: This is largely to trick fastisel into ignoring the pattern.
-def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2),
- (X86Unpckh node:$src1, node:$src2), [{
- return N->getOperand(0) == N->getOperand(1);
-}]>;
-
-let Predicates = [UseSSE2] in {
- // TODO: This is a hack pattern to allow lowering to emit unpckh instead of
- // movhlps for sse2 without changing a bunch of tests.
- def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)),
- (MOVHLPSrr VR128:$src, VR128:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
@@ -858,7 +866,7 @@ let hasSideEffects = 0 in {
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
[(set RC:$dst, (DstTy (sint_to_fp
- (SrcTy (bitconvert (ld_frag addr:$src))))))], d>,
+ (SrcTy (ld_frag addr:$src)))))], d>,
Sched<[sched.Folded]>;
}
}
@@ -874,7 +882,7 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
}
@@ -1001,18 +1009,17 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
-// FIXME: We probably want to match the rm form only when optimizing for
-// size, to avoid false depenendecies (see sse_fp_unop_s for details)
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
- string asm, X86FoldableSchedWrite sched> {
+ ValueType DstVT, ValueType SrcVT, SDNode OpNode,
+ Operand memop, ComplexPattern mem_cpat, string asm,
+ X86FoldableSchedWrite sched> {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))]>,
+ [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
Sched<[sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))]>,
+ [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
Sched<[sched.Folded]>;
}
@@ -1032,21 +1039,21 @@ let hasSideEffects = 0 in {
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let Predicates = [UseAVX] in {
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
- int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
+ X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_LIG;
-defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
+ X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
}
-defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
-defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
@@ -1078,60 +1085,60 @@ let isCodeGenOnly = 1 in {
// Aliases for intrinsics
let isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in {
-defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS, VEX;
-defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+ X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>,
XS, VEX, VEX_W;
-defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSS2I>, XD, VEX;
-defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+ X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I>,
XD, VEX, VEX_W;
}
-defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
WriteCvtSS2I>, XS;
-defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+ X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>, XS, REX_W;
-defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSD2I>, XD;
-defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+ X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSD2I>, XD, REX_W;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
-defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
}
-defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS;
-defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
WriteCvtSS2I>, XS, REX_W;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PSY>,
PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
PS, Requires<[UseSSE2]>;
@@ -1186,7 +1193,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XD, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
def : Pat<(f32 (fpround FR64:$src)),
@@ -1217,7 +1224,7 @@ def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1231,7 +1238,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, Requires<[UseSSE2]>,
- Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
@@ -1248,7 +1255,7 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
XS, VEX_4V, VEX_LIG, VEX_WIG,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>;
}
@@ -1295,7 +1302,7 @@ def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1307,7 +1314,7 @@ def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[]>, XS, Requires<[UseSSE2]>,
- Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+ Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
} // isCodeGenOnly = 1
@@ -1690,7 +1697,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1700,7 +1707,7 @@ def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
@@ -1714,7 +1721,7 @@ let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+ (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
Sched<[WriteCvtI2PDLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1826,7 +1833,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -1836,7 +1843,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
}
}
@@ -1878,7 +1885,7 @@ let mayLoad = 1 in
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let isCodeGenOnly = 1 in {
@@ -1920,7 +1927,7 @@ let mayLoad = 1 in
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1938,7 +1945,7 @@ let mayLoad = 1 in
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
mem_cpat:$src2))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Defs = [EFLAGS] in {
@@ -2003,7 +2010,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2013,7 +2020,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
let mayLoad = 1 in
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>,
+ asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
@@ -2109,7 +2116,7 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -2165,58 +2172,58 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))], d>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
-defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
-defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
-defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
- defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
- defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
- defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
@@ -2253,6 +2260,16 @@ let Predicates = [HasAVX] in {
SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
+
+ // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+ def : Pat<(X86movmsk (v4i32 VR128:$src)),
+ (VMOVMSKPSrr VR128:$src)>;
+ def : Pat<(X86movmsk (v2i64 VR128:$src)),
+ (VMOVMSKPDrr VR128:$src)>;
+ def : Pat<(X86movmsk (v8i32 VR256:$src)),
+ (VMOVMSKPSYrr VR256:$src)>;
+ def : Pat<(X86movmsk (v4i64 VR256:$src)),
+ (VMOVMSKPDYrr VR256:$src)>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
@@ -2260,6 +2277,14 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
SSEPackedDouble>, PD;
+let Predicates = [UseSSE2] in {
+ // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+ def : Pat<(X86movmsk (v4i32 VR128:$src)),
+ (MOVMSKPSrr VR128:$src)>;
+ def : Pat<(X86movmsk (v2i64 VR128:$src)),
+ (MOVMSKPDrr VR128:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
@@ -2284,9 +2309,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
@@ -2296,16 +2320,16 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, loadv2i64, i128mem, sched.XMM,
+ VR128, load, i128mem, sched.XMM,
IsCommutable, 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
- memopv2i64, i128mem, sched.XMM, IsCommutable, 1>;
+ memop, i128mem, sched.XMM, IsCommutable, 1>;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
- OpVT256, VR256, loadv4i64, i256mem, sched.YMM,
+ OpVT256, VR256, load, i256mem, sched.YMM,
IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
}
@@ -2365,24 +2389,136 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
+let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+ (VPANDYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+ (VPORYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+ (VPXORYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+ (VPANDNYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPANDYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPORYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPXORYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+ (VPANDNYrm VR256:$src1, addr:$src2)>;
+}
+
// If only AVX1 is supported, we need to handle integer operations with
// floating point instructions since the integer versions aren't available.
let Predicates = [HasAVX1Only] in {
+ def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
(VANDPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
(VORPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
(VXORPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
(VANDNPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
(VANDPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
(VORPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
(VXORPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
(VANDNPSYrm VR256:$src1, addr:$src2)>;
}
@@ -2480,6 +2616,122 @@ let Predicates = [UseSSE2] in {
FR64)>;
}
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+ (VPANDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+ (VPORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+ (VPXORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+ (VPANDNrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPANDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPXORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
+ (VPANDNrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+ (PANDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+ (PORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+ (PXORrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
+ (PANDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
+ (PORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+ def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
+ (PXORrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
+ (PANDNrm VR128:$src1, addr:$src2)>;
+}
+
// Patterns for packed operations when we don't have integer type available.
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
(ANDPSrr VR128:$src1, VR128:$src2)>;
@@ -2713,7 +2965,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
[(set RC:$dst, (OpNode (load addr:$src1)))], d>,
- Sched<[sched.Folded, ReadAfterLd]>,
+ Sched<[sched.Folded]>,
Requires<[target, OptForSize]>;
let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
@@ -2723,7 +2975,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -2777,7 +3029,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], d>, Sched<[sched.Folded, ReadAfterLd]>;
+ [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -2787,7 +3039,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, intmemop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -3306,6 +3558,19 @@ def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
let Predicates = [HasAVX, NoVLX] in {
// Additional patterns for other integer sizes.
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(alignedloadv8i16 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(alignedloadv16i8 addr:$src),
+ (VMOVDQArm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+ def : Pat<(loadv8i16 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+ def : Pat<(loadv16i8 addr:$src),
+ (VMOVDQUrm addr:$src)>;
+
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
(VMOVDQAmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
@@ -3345,8 +3610,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
} // ExeDomain = SSEPackedInt
@@ -3358,13 +3623,13 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
SchedWriteVecALU, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SchedWriteVecALU, 1, NoVLX>;
-defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
+defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
+defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
+defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
+defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
@@ -3380,13 +3645,13 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
SchedWriteVecALU, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
SchedWriteVecALU, 0, NoVLX>;
-defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
+defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
+defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
@@ -3405,28 +3670,28 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+ load, i128mem, SchedWriteVecIMul.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
- VR256, loadv4i64, i256mem, SchedWriteVecIMul.YMM,
+ VR256, load, i256mem, SchedWriteVecIMul.YMM,
0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+ memop, i128mem, SchedWriteVecIMul.XMM>;
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
- loadv2i64, i128mem, SchedWritePSADBW.XMM, 0>,
+ load, i128mem, SchedWritePSADBW.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
- loadv4i64, i256mem, SchedWritePSADBW.YMM, 0>,
+ load, i256mem, SchedWritePSADBW.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
- memopv2i64, i128mem, SchedWritePSADBW.XMM>;
+ memop, i128mem, SchedWritePSADBW.XMM>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -3453,8 +3718,8 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (SrcVT (bitconvert (ld_frag addr:$src2))))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (SrcVT (ld_frag addr:$src2)))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!if(Is2Addr,
@@ -3473,16 +3738,16 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
- DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
- DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L,
+ DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
- memopv2i64>;
+ memop>;
}
multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
@@ -3582,7 +3847,7 @@ let Predicates = [HasAVX, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
+ (vt128 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX,
Sched<[sched.XMM.Folded]>, VEX_WIG;
}
@@ -3600,7 +3865,7 @@ let Predicates = [HasAVX2, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
+ (vt256 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX, VEX_L,
Sched<[sched.YMM.Folded]>, VEX_WIG;
}
@@ -3618,7 +3883,7 @@ let Predicates = [UseSSE2] in {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (vt128 (OpNode (memop addr:$src1),
(i8 imm:$src2))))]>,
Sched<[sched.XMM.Folded]>;
}
@@ -3658,8 +3923,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
@@ -3683,53 +3948,53 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OutVT (OpNode (ArgVT RC:$src1),
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
}
} // ExeDomain = SSEPackedInt
@@ -3754,89 +4019,88 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1,
- (bitconvert (ld_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+ i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
- i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+ i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
- i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+ i128mem, SchedWriteShuffle.XMM, memop>;
}
} // ExeDomain = SSEPackedInt
@@ -3864,7 +4128,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>,
- Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
// Extract
@@ -4155,7 +4419,7 @@ let Predicates = [UseAVX] in {
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
@@ -4180,7 +4444,7 @@ let Predicates = [UseSSE2] in {
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(MOVDI2PDIrm addr:$src)>;
@@ -4335,30 +4599,30 @@ defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
(VMOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(VMOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
(VMOVSLDUPrm addr:$src)>;
def : Pat<(v8i32 (X86Movshdup VR256:$src)),
(VMOVSHDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
(VMOVSHDUPYrm addr:$src)>;
def : Pat<(v8i32 (X86Movsldup VR256:$src)),
(VMOVSLDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
(VMOVSLDUPYrm addr:$src)>;
}
let Predicates = [UseSSE3] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(MOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
(MOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(MOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
(MOVSLDUPrm addr:$src)>;
}
@@ -4405,12 +4669,16 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+ def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
+ def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ (MOVDDUPrm addr:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -4453,7 +4721,7 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
@@ -4504,7 +4772,7 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode,
@@ -4522,7 +4790,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
@@ -4580,7 +4848,7 @@ multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (vt (OpNode (bitconvert (ld_frag addr:$src)))))]>,
+ (vt (OpNode (ld_frag addr:$src))))]>,
Sched<[sched.XMM.Folded]>;
}
@@ -4597,19 +4865,19 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
+ (vt (OpNode (load addr:$src))))]>,
Sched<[sched.YMM.Folded]>;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
- loadv2i64>, VEX, VEX_WIG;
+ load>, VEX, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
@@ -4623,11 +4891,11 @@ let Predicates = [HasAVX2, NoVLX] in {
}
defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
- memopv2i64>;
+ memop>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -4652,9 +4920,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (DstVT (OpNode (OpVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -4675,9 +4942,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (ld_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -4693,83 +4959,83 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId256 VR256:$src1, (load addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
- VR128, loadv2i64, i128mem,
+ VR128, load, i128mem,
SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, loadv2i64, i128mem,
+ v16i8, VR128, load, i128mem,
SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, loadv2i64, i128mem,
+ VR128, load, i128mem,
SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
- loadv2i64, i128mem,
+ load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
}
}
let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
- v32i8, VR256, loadv4i64, i256mem,
+ v32i8, VR256, load, i256mem,
SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
- loadv4i64, i256mem,
+ load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
- VR256, loadv4i64, i256mem,
+ VR256, load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
- loadv4i64, i256mem,
+ load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
@@ -4790,33 +5056,33 @@ let isCommutable = 0 in {
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+ memop, i128mem, SchedWritePHAdd.XMM>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, memopv2i64>;
+ SchedWriteVecALU.XMM, memop>;
defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
+ memop, i128mem, SchedWriteVarShuffle.XMM>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, memopv2i64>;
+ SchedWritePHAdd.XMM, memop>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, memopv2i64>;
+ SchedWritePHAdd.XMM, memop>;
defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
- v16i8, VR128, memopv2i64, i128mem,
+ v16i8, VR128, memop, i128mem,
SchedWriteVecIMul.XMM>;
}
defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
- VR128, memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+ VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
}
//===---------------------------------------------------------------------===//
@@ -4843,20 +5109,20 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
- (bitconvert (memop_frag addr:$src2)),
+ (memop_frag addr:$src2),
(i8 imm:$src3))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
- defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, i128mem,
+ defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
- defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, i256mem,
+ defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, i128mem,
+ defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
SchedWriteShuffle.XMM>;
//===---------------------------------------------------------------------===//
@@ -4936,34 +5202,72 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
-// AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
+// Patterns that we also need for any_extend.
+// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg.
+multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> {
// Register-Register patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
}
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
+
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+ (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
+ }
+
+ // AVX2 Register-Memory patterns
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ }
+
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+ }
+}
+
+// AVX2 Patterns
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
+ SDNode ExtOp, SDNode InVecOp> :
+ SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> {
+
+ // Register-Register patterns
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
+ def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
+ def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
(!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
-
- def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
- (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
}
// Simple Register-Memory patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
}
- let Predicates = [HasAVX, NoVLX] in {
+ let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
@@ -4979,60 +5283,39 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
}
// AVX2 Register-Memory patterns
- let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- }
- let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
}
-defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
-defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
+defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>;
// SSE4.1/AVX patterns.
multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
@@ -5082,7 +5365,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -5092,7 +5375,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -5101,7 +5384,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5112,7 +5395,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -5121,7 +5404,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5132,7 +5415,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
}
@@ -5298,8 +5581,8 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoBWI] in
@@ -5324,8 +5607,8 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5350,8 +5633,8 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
+ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5383,7 +5666,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
(X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
- Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
@@ -5446,7 +5729,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5461,7 +5744,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
@@ -5479,7 +5762,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
(outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5494,7 +5777,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
(outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
}
@@ -5522,7 +5805,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
@@ -5545,7 +5828,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
@@ -5846,7 +6129,7 @@ def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -5856,7 +6139,7 @@ def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM.Folded, ReadAfterLd]>,
+ Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
VEX, VEX_L, VEX_WIG;
}
@@ -5868,7 +6151,7 @@ def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
}
// The bit test instructions below are AVX only
@@ -5882,7 +6165,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
@@ -5950,7 +6233,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
(ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (v8i16 (OpNode (v8i16 (bitconvert (ld_frag addr:$src))))))]>,
+ (v8i16 (OpNode (ld_frag addr:$src))))]>,
Sched<[Sched.Folded]>;
}
@@ -5958,10 +6241,10 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
// model, although the naming is misleading.
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
- X86phminpos, loadv2i64,
+ X86phminpos, load,
WritePHMINPOS>, VEX, VEX_WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
- X86phminpos, memopv2i64,
+ X86phminpos, memop,
WritePHMINPOS>;
/// SS48I_binop_rm - Simple SSE41 binary operator.
@@ -5983,118 +6266,118 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+ load, i128mem, SchedWriteVecIMul.XMM, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecIMul.YMM, 0>,
+ load, i256mem, SchedWriteVecIMul.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecIMul.XMM, 1>;
+ memop, i128mem, SchedWriteVecIMul.XMM, 1>;
}
let Predicates = [HasAVX, NoVLX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
- loadv2i64, i128mem, SchedWritePMULLD.XMM, 0>,
+ load, i128mem, SchedWritePMULLD.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX] in
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- loadv4i64, i256mem, SchedWritePMULLD.YMM, 0>,
+ load, i256mem, SchedWritePMULLD.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memopv2i64, i128mem, SchedWritePMULLD.XMM, 1>;
+ memop, i128mem, SchedWritePMULLD.XMM, 1>;
defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+ memop, i128mem, SchedWriteVecALU.XMM, 1>;
}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
@@ -6120,9 +6403,8 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
@@ -6148,9 +6430,8 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def BlendCommuteImm2 : SDNodeXForm<imm, [{
@@ -6171,28 +6452,28 @@ def BlendCommuteImm8 : SDNodeXForm<imm, [{
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, loadv2i64, i128mem, 0,
+ VR128, load, i128mem, 0,
SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, loadv4f32, f128mem, 0,
+ VR128, load, f128mem, 0,
SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, loadv2f64, f128mem, 0,
+ VR128, load, f128mem, 0,
SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, loadv8f32, i256mem, 0,
+ VR256, load, i256mem, 0,
SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, loadv4i64, i256mem, 0,
+ VR256, load, i256mem, 0,
SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -6200,17 +6481,17 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem, 1,
+ VR128, memop, i128mem, 1,
SchedWriteMPSAD.XMM>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem, 1,
+ VR128, memop, f128mem, 1,
SchedWriteDPPS.XMM>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem, 1,
+ VR128, memop, f128mem, 1,
SchedWriteDPPD.XMM>;
}
@@ -6238,56 +6519,54 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
- RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
let Predicates = [HasAVX] in {
defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
- VR128, loadv4f32, f128mem, 0, SSEPackedSingle,
+ VR128, load, f128mem, 0, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>,
VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
- VR256, loadv8f32, f256mem, 0, SSEPackedSingle,
+ VR256, load, f256mem, 0, SSEPackedSingle,
SchedWriteFBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
- VR128, loadv2f64, f128mem, 0, SSEPackedDouble,
+ VR128, load, f128mem, 0, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>,
VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
- VR256, loadv4f64, f256mem, 0, SSEPackedDouble,
+ VR256, load, f256mem, 0, SSEPackedDouble,
SchedWriteFBlend.YMM, BlendCommuteImm4>,
VEX_4V, VEX_L, VEX_WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
- VR128, loadv2i64, i128mem, 0, SSEPackedInt,
+ VR128, load, i128mem, 0, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>,
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2] in {
defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
- VR256, loadv4i64, i256mem, 0, SSEPackedInt,
+ VR256, load, i256mem, 0, SSEPackedInt,
SchedWriteBlend.YMM, BlendCommuteImm8>,
VEX_4V, VEX_L, VEX_WIG;
}
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
- VR128, memopv4f32, f128mem, 1, SSEPackedSingle,
+ VR128, memop, f128mem, 1, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>;
defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
- VR128, memopv2f64, f128mem, 1, SSEPackedDouble,
+ VR128, memop, f128mem, 1, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>;
defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
- VR128, memopv2i64, i128mem, 1, SSEPackedInt,
+ VR128, memop, i128mem, 1, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>;
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6321,20 +6600,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ (IntId RC:$src1, (mem_frag addr:$src2),
RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
- loadv2f64, int_x86_sse41_blendvpd,
+ load, int_x86_sse41_blendvpd,
SchedWriteFVarBlend.XMM>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
loadv4f64, int_x86_avx_blendv_pd_256,
@@ -6342,20 +6621,20 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
- loadv4f32, int_x86_sse41_blendvps,
+ load, int_x86_sse41_blendvps,
SchedWriteFVarBlend.XMM>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
loadv8f32, int_x86_avx_blendv_ps_256,
SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- loadv2i64, int_x86_sse41_pblendvb,
+ load, int_x86_sse41_pblendvb,
SchedWriteVarBlend.XMM>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- loadv4i64, int_x86_avx2_pblendvb,
+ load, int_x86_avx2_pblendvb,
SchedWriteVarBlend.YMM>, VEX_L;
}
@@ -6486,18 +6765,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (mem_frag addr:$src2)), XMM0))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (mem_frag addr:$src2), XMM0))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memop, f128mem,
int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memop, f128mem,
int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memop, i128mem,
int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
// Aliases with the implicit xmm0 argument
@@ -6553,6 +6832,12 @@ let Predicates = [HasAVX2, NoVLX] in {
(VMOVNTDQAYrm addr:$src)>;
def : Pat<(v4i64 (alignednontemporalload addr:$src)),
(VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
}
let Predicates = [HasAVX, NoVLX] in {
@@ -6562,6 +6847,12 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVNTDQArm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(VMOVNTDQArm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -6571,6 +6862,12 @@ let Predicates = [UseSSE41] in {
(MOVNTDQArm addr:$src)>;
def : Pat<(v2i64 (alignednontemporalload addr:$src)),
(MOVNTDQArm addr:$src)>;
+ def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
+ def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
+ def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
}
} // AddedComplexity
@@ -6598,22 +6895,22 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+ load, i128mem, SchedWriteVecALU.XMM, 0>,
VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+ load, i256mem, SchedWriteVecALU.YMM, 0>,
VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
- memopv2i64, i128mem, SchedWriteVecALU.XMM>;
+ memop, i128mem, SchedWriteVecALU.XMM>;
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
@@ -6628,7 +6925,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
def rm :SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
@@ -6646,7 +6943,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
def rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
}
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6664,7 +6961,7 @@ multiclass SS42AI_pcmpistri<string asm> {
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
@@ -6682,7 +6979,7 @@ multiclass SS42AI_pcmpestri<string asm> {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, u8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>;
+ []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
}
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6712,7 +7009,7 @@ class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
- Sched<[WriteCRC32.Folded, ReadAfterLd]>;
+ Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
@@ -6764,10 +7061,10 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+ (memop addr:$src2), XMM0)),
(set VR128:$dst, (IntId VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8,
- Sched<[sched.Folded, ReadAfterLd]>;
+ (memop addr:$src2))))]>, T8,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
@@ -6783,9 +7080,10 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (memop addr:$src2),
(i8 imm:$src3)))]>, TA,
- Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecIMul.XMM.Folded,
+ SchedWriteVecIMul.XMM.ReadAfterFold]>;
defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
SchedWriteVecIMul.XMM>;
@@ -6828,46 +7126,46 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2), "",
[(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
- Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>;
+ Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
}
}
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, loadv2i64>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
}
let Predicates = [NoVLX, HasVAES] in {
defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesenc_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesenclast_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesdec_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast_256, loadv4i64, 0, VR256,
+ int_x86_aesni_aesdeclast_256, load, 0, VR256,
i256mem>, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
- int_x86_aesni_aesenc, memopv2i64, 1>;
+ int_x86_aesni_aesenc, memop, 1>;
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
- int_x86_aesni_aesenclast, memopv2i64, 1>;
+ int_x86_aesni_aesenclast, memop, 1>;
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
- int_x86_aesni_aesdec, memopv2i64, 1>;
+ int_x86_aesni_aesdec, memop, 1>;
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
- int_x86_aesni_aesdeclast, memopv2i64, 1>;
+ int_x86_aesni_aesdeclast, memop, 1>;
}
// Perform the AES InvMixColumn Transformation
@@ -6881,7 +7179,7 @@ let Predicates = [HasAVX, HasAES] in {
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6892,7 +7190,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
Sched<[WriteAESIMC.Folded]>;
// AES Round Key Generation Assist
@@ -6907,7 +7205,7 @@ let Predicates = [HasAVX, HasAES] in {
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -6920,7 +7218,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>;
//===----------------------------------------------------------------------===//
@@ -6948,12 +7246,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
+ (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
- def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
+ def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
(i8 imm:$src3)),
(PCLMULQDQrm VR128:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>;
@@ -6986,7 +7284,7 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
- Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+ Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and
// rotating the immediate.
@@ -6996,11 +7294,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
}
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
-defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, loadv2i64,
+defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
let Predicates = [NoVLX, HasVPCLMULQDQ] in
-defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, loadv4i64,
+defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
@@ -7156,11 +7454,11 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTI128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI128 addr:$src)>;
}
@@ -7174,11 +7472,11 @@ def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
let Predicates = [HasAVX1Only] in {
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTF128 addr:$src)>;
}
@@ -7194,7 +7492,7 @@ let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, u8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
// To create a 256-bit all ones value, we should produce VCMPTRUEPS
@@ -7211,7 +7509,7 @@ multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
(!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
- (From (bitconvert (memop_frag addr:$src2))),
+ (From (memop_frag addr:$src2)),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7224,9 +7522,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [HasAVX1Only] in {
defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv2i64>;
+ defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>;
+ defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
+ defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>;
}
//===----------------------------------------------------------------------===//
@@ -7315,7 +7613,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
- X86MemOperand x86memop_i, PatFrag i_frag,
+ X86MemOperand x86memop_i,
ValueType f_vt, ValueType i_vt,
X86FoldableSchedWrite sched,
X86FoldableSchedWrite varsched> {
@@ -7329,8 +7627,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
- (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
- Sched<[varsched.Folded, ReadAfterLd]>;
+ (i_vt (load addr:$src2)))))]>, VEX_4V,
+ Sched<[varsched.Folded, sched.ReadAfterFold]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
@@ -7348,18 +7646,18 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- loadv2i64, v4f32, v4i32, SchedWriteFShuffle.XMM,
+ v4f32, v4i32, SchedWriteFShuffle.XMM,
SchedWriteFVarShuffle.XMM>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- loadv4i64, v8f32, v8i32, SchedWriteFShuffle.YMM,
+ v8f32, v8i32, SchedWriteFShuffle.YMM,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- loadv2i64, v2f64, v2i64, SchedWriteFShuffle.XMM,
+ v2f64, v2i64, SchedWriteFShuffle.XMM,
SchedWriteFVarShuffle.XMM>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- loadv4i64, v4f64, v4i64, SchedWriteFShuffle.YMM,
+ v4f64, v4i64, SchedWriteFShuffle.YMM,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
@@ -7380,7 +7678,7 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L,
- Sched<[WriteFShuffle256Ld, ReadAfterLd]>;
+ Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
@@ -7440,8 +7738,7 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86cvtph2ps (bc_v8i16
- (loadv2i64 addr:$src))))]>,
+ [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
T8PD, VEX, Sched<[sched.Folded]>;
}
@@ -7515,7 +7812,7 @@ let Predicates = [HasF16C, NoVLX] in {
/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, X86FoldableSchedWrite sched,
- RegisterClass RC, PatFrag memop_frag,
+ RegisterClass RC,
X86MemOperand x86memop, SDNodeXForm commuteXForm> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
@@ -7529,22 +7826,20 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
- Sched<[sched.Folded, ReadAfterLd]>, VEX_4V;
+ (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source.
- def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
- RC:$src1, imm:$src3)),
+ def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
- SchedWriteBlend.XMM, VR128, loadv2i64, i128mem,
+ SchedWriteBlend.XMM, VR128, i128mem,
BlendCommuteImm4>;
defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
- SchedWriteBlend.YMM, VR256, loadv4i64, i256mem,
+ SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L;
// For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -7743,6 +8038,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ (VMOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX1Only] in {
@@ -7778,7 +8075,7 @@ let Predicates = [HasAVX1Only] in {
// VPERM - Permute instructions
//
-multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+multiclass avx2_perm<bits<8> opc, string OpcodeStr,
ValueType OpVT, X86FoldableSchedWrite Sched,
X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
@@ -7795,16 +8092,14 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
- (bitconvert (mem_frag addr:$src2)))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
+ (load addr:$src2))))]>,
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
- i256mem>;
+defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
- f256mem>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT, X86FoldableSchedWrite Sched,
@@ -7824,7 +8119,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
- Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
}
}
@@ -7849,7 +8144,7 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>,
- Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
@@ -7869,14 +8164,14 @@ let mayLoad = 1 in
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, u8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2, NoVLX] in {
defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv2i64>;
+ defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>;
+ defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
+ defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>;
}
//===----------------------------------------------------------------------===//
@@ -7941,7 +8236,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
// masked store
- def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
+ def: Pat<(X86mstore (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
// masked load
def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
@@ -8035,8 +8330,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
- (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>;
+ (vt128 (load addr:$src2)))))]>,
+ VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
+ SchedWriteVarVecShift.XMM.ReadAfterFold]>;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8048,8 +8344,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
- (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>;
+ (vt256 (load addr:$src2)))))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
+ SchedWriteVarVecShift.YMM.ReadAfterFold]>;
}
let Predicates = [HasAVX2, NoVLX] in {
@@ -8061,13 +8358,11 @@ let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
(VPSRAVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrav VR128:$src1,
- (bitconvert (loadv2i64 addr:$src2)))),
+ def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
(VPSRAVDrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
(VPSRAVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1,
- (bitconvert (loadv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
(VPSRAVDYrm VR256:$src1, addr:$src2)>;
}
@@ -8132,51 +8427,6 @@ let Predicates = [UseAVX2] in {
}
//===----------------------------------------------------------------------===//
-// Extra selection patterns for f128, f128mem
-
-// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-def : Pat<(store (f128 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-
-def : Pat<(alignedloadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVAPSrm addr:$src), VR128)>;
-def : Pat<(loadf128 addr:$src),
- (COPY_TO_REGCLASS (MOVUPSrm addr:$src), VR128)>;
-
-// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ANDPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ANDPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (ORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (ORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
- (COPY_TO_REGCLASS
- (XORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
- VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
- (COPY_TO_REGCLASS
- (XORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
- (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-//===----------------------------------------------------------------------===//
// GFNI instructions
//===----------------------------------------------------------------------===//
@@ -8194,8 +8444,8 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
- (bitconvert (MemOpFrag addr:$src2)))))]>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD;
+ (MemOpFrag addr:$src2))))]>,
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
}
}
@@ -8212,9 +8462,9 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1,
- (bitconvert (MemOpFrag addr:$src2)),
+ (MemOpFrag addr:$src2),
imm:$src3)))], SSEPackedInt>,
- Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>;
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
}
@@ -8222,24 +8472,24 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
- VR128, loadv2i64, i128mem, 1>;
+ VR128, load, i128mem, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128,
- loadv2i64, i128mem>, VEX_4V, VEX_W;
+ load, i128mem>, VEX_4V, VEX_W;
defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256,
- loadv4i64, i256mem>, VEX_4V, VEX_L, VEX_W;
+ load, i256mem>, VEX_4V, VEX_L, VEX_W;
}
}
// GF2P8MULB
let Constraints = "$src1 = $dst",
Predicates = [HasGFNI, UseSSE2] in
-defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memopv2i64,
+defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
i128mem, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
- defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, loadv2i64,
+ defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
i128mem>, VEX_4V;
- defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, loadv4i64,
+ defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
i256mem>, VEX_4V, VEX_L;
}
// GF2P8AFFINEINVQB, GF2P8AFFINEQB
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 023137634df1..7cd63a6dd820 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -16,7 +16,7 @@
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (shl GR8:$src1, CL))]>;
@@ -29,7 +29,7 @@ def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t{%cl, $dst|$dst, cl}",
[(set GR64:$dst, (shl GR64:$src1, CL))]>;
-} // Uses = [CL]
+} // Uses = [CL], SchedRW
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
@@ -64,11 +64,9 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
} // hasSideEffects = 0
} // Constraints = "$src = $dst", SchedRW
-
-let SchedRW = [WriteShiftLd, WriteRMW] in {
// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
// using CL?
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, cl}",
[(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -85,6 +83,8 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
[(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -120,7 +120,7 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
} // SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (srl GR8:$src1, CL))]>;
@@ -166,8 +166,7 @@ def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, cl}",
[(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -184,6 +183,8 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
[(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -219,7 +220,7 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
} // SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (sra GR8:$src1, CL))]>;
@@ -268,8 +269,7 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -286,6 +286,8 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -325,9 +327,9 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, cl}", []>;
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -357,7 +359,7 @@ def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
} // Uses = [EFLAGS]
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, cl}", []>;
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -389,7 +391,7 @@ def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in {
+let SchedRW = [WriteRotateLd, WriteRMW], mayStore = 1 in {
let Uses = [EFLAGS] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", []>;
@@ -428,7 +430,7 @@ def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
Requires<[In64BitMode]>;
} // Uses = [EFLAGS]
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{%cl, $dst|$dst, cl}", []>;
def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
@@ -452,9 +454,9 @@ def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
} // SchedRW
} // hasSideEffects = 0
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (rotl GR8:$src1, CL))]>;
@@ -498,8 +500,7 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, cl}",
[(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -514,6 +515,8 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
[(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
@@ -548,8 +551,8 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // SchedRW
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, cl}",
[(set GR8:$dst, (rotr GR8:$src1, CL))]>;
@@ -595,8 +598,7 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
} // Constraints = "$src = $dst", SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -611,6 +613,8 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
Requires<[In64BitMode]>;
}
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -822,6 +826,8 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
return getI8Imm(64 - N->getZExtValue(), SDLoc(N));
}]>;
+// NOTE: We use WriteShift for these rotates as they avoid the stalls
+// of many of the older x86 rotate instructions.
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
@@ -844,12 +850,12 @@ let hasSideEffects = 0 in {
def rm : I<0xF7, MRMSrcMem4VOp3,
(outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX, Sched<[WriteShiftLd,
+ VEX, Sched<[WriteShift.Folded,
// x86memop:$src1
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src2
- ReadAfterLd]>;
+ WriteShift.ReadAfterFold]>;
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 322bdb74e2de..c417dc99b84d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -13,126 +13,42 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// No op bitconverts
-//===----------------------------------------------------------------------===//
-
-// Bitcasts between 128-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-
-// Bitcasts between 256-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
-def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
-
-// Bitcasts between 512-bit vector types. Return the original type since
-// no instruction is needed for the conversion.
-def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
-def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
-
-
-//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
-// A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
- (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
- (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+let Predicates = [NoAVX512] in {
+ // A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+ def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+}
+
+let Predicates = [HasAVX512] in {
+ // A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
+ def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>;
+}
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (COPY_TO_REGCLASS FR32:$src, VR128)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (COPY_TO_REGCLASS FR64:$src, VR128)>;
+let Predicates = [NoVLX] in {
+ // Implicitly promote a 32-bit scalar to a vector.
+ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
+ // Implicitly promote a 64-bit scalar to a vector.
+ def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
+}
+let Predicates = [HasVLX] in {
+ // Implicitly promote a 32-bit scalar to a vector.
+ def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
+ (COPY_TO_REGCLASS FR32X:$src, VR128X)>;
+ // Implicitly promote a 64-bit scalar to a vector.
+ def : Pat<(v2f64 (scalar_to_vector FR64X:$src)),
+ (COPY_TO_REGCLASS FR64X:$src, VR128X)>;
+}
//===----------------------------------------------------------------------===//
// Subvector tricks
@@ -509,3 +425,85 @@ let Predicates = [HasBWI, HasVLX] in {
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+let Predicates = [NoAVX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (MOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (MOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+def : Pat<(store (f128 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+ (VMOVAPSZ128rm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+ (VMOVUPSZ128rm addr:$src)>;
+}
+
+let Predicates = [UseSSE1] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
+ (ANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
+ (ORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
+ (XORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
+ (VANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+ (VANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
+ (VORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+ (VORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
+ (VXORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+ (VXORPSrr VR128:$src1, VR128:$src2)>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index ff3e3be48a24..9d810a675e3b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -11,32 +11,32 @@
//
//===----------------------------------------------------------------------===//
-multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+ Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
- defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
- defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
- defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
- defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
- defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
- defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
- defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
- defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
- defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
- defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
- defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
- defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
- defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
- defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
- defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd>;
}
// Scalar load 2 addr operand instructions
@@ -48,47 +48,47 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int mem_cpat:$src))]>, XOP,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
- PatFrag memop, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
- PatFrag memop, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
- Sched<[sched.Folded, ReadAfterLd]>;
+ [(set VR256:$dst, (Int (load addr:$src)))]>, XOP, VEX_L,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
- defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
+ defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps,
SchedWriteFRnd.XMM>;
- defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
+ defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
- defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
+ defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd,
SchedWriteFRnd.XMM>;
- defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
SchedWriteFRnd.YMM>;
}
@@ -105,15 +105,15 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+ (vt128 (load addr:$src2)))))]>,
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+ (vt128 (OpNode (vt128 (load addr:$src1)),
(vt128 VR128:$src2))))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -150,8 +150,8 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
- XOP, Sched<[sched.Folded, ReadAfterLd]>;
+ (vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
+ XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
@@ -181,8 +181,8 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ (Int VR128:$src1, (load addr:$src2),
+ VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
@@ -260,9 +260,9 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ (vt128 (load addr:$src2)),
imm:$cc)))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
@@ -274,12 +274,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+ []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
NotMemoryFoldable;
}
}
- def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
+ def : Pat<(OpNode (load addr:$src2),
(vt128 VR128:$src1), imm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>;
@@ -310,21 +310,21 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ (vt128 (load addr:$src3)))))]>,
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)),
(vt128 VR128:$src3))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
@@ -350,25 +350,26 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
Sched<[sched]>;
+ // FIXME: This pattern can't match.
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
- XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+ XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -385,6 +386,48 @@ let ExeDomain = SSEPackedInt in {
SchedWriteShuffle.YMM>, VEX_L;
}
+let Predicates = [HasXOP] in {
+ def : Pat<(v16i8 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v8i16 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v16i8 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v8i16 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+ def : Pat<(or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+ def : Pat<(v32i8 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v16i16 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v8i32 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v32i8 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v16i16 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+ def : Pat<(or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+}
+
multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
X86MemOperand intmemop, X86MemOperand fpmemop,
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
@@ -401,10 +444,9 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
- (VT (X86vpermil2 RC:$src1, RC:$src2,
- (bitconvert (IntLdFrag addr:$src3)),
+ (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
(i8 imm:$src4))))]>, VEX_W,
- Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
@@ -412,11 +454,11 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
- Sched<[sched.Folded, ReadAfterLd,
+ Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
- ReadAfterLd]>;
+ sched.ReadAfterFold]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -437,10 +479,10 @@ let ExeDomain = SSEPackedDouble in {
let ExeDomain = SSEPackedSingle in {
defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
- v4f32, loadv4f32, loadv2i64,
+ v4f32, loadv4f32, loadv4i32,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
- v8f32, loadv8f32, loadv4i64,
+ v8f32, loadv8f32, loadv8i32,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 36d36cb11d72..c20336387b2d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -89,6 +89,8 @@ private:
MachineFunction &MF) const;
bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -114,8 +116,10 @@ private:
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectShift(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
- bool selectSDiv(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
// emit insert subreg instruction and insert it before MachineInstr &I
bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
@@ -362,11 +366,14 @@ bool X86InstructionSelector::select(MachineInstr &I,
return selectAnyext(I, MRI, MF);
case TargetOpcode::G_ICMP:
return selectCmp(I, MRI, MF);
+ case TargetOpcode::G_FCMP:
+ return selectFCmp(I, MRI, MF);
case TargetOpcode::G_UADDE:
return selectUadde(I, MRI, MF);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI, MF, CoverageInfo);
case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
return selectMergeValues(I, MRI, MF, CoverageInfo);
case TargetOpcode::G_EXTRACT:
return selectExtract(I, MRI, MF);
@@ -382,7 +389,12 @@ bool X86InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_LSHR:
return selectShift(I, MRI, MF);
case TargetOpcode::G_SDIV:
- return selectSDiv(I, MRI, MF);
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ return selectDivRem(I, MRI, MF);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsicWSideEffects(I, MRI, MF);
}
return false;
@@ -967,6 +979,98 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
return true;
}
+bool X86InstructionSelector::selectFCmp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
+
+ unsigned LhsReg = I.getOperand(2).getReg();
+ unsigned RhsReg = I.getOperand(3).getReg();
+ CmpInst::Predicate Predicate =
+ (CmpInst::Predicate)I.getOperand(1).getPredicate();
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static const uint16_t SETFOpcTable[2][3] = {
+ {X86::SETEr, X86::SETNPr, X86::AND8rr},
+ {X86::SETNEr, X86::SETPr, X86::OR8rr}};
+ const uint16_t *SETFOpc = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ break;
+ }
+
+ // Compute the opcode for the CMP instruction.
+ unsigned OpCmp;
+ LLT Ty = MRI.getType(LhsReg);
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 32:
+ OpCmp = X86::UCOMISSrr;
+ break;
+ case 64:
+ OpCmp = X86::UCOMISDrr;
+ break;
+ }
+
+ unsigned ResultReg = I.getOperand(0).getReg();
+ RBI.constrainGenericRegister(
+ ResultReg,
+ *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
+ if (SETFOpc) {
+ MachineInstr &CmpInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+ .addReg(LhsReg)
+ .addReg(RhsReg);
+
+ unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
+ MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[0]), FlagReg1);
+ MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[1]), FlagReg2);
+ MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SETFOpc[2]), ResultReg)
+ .addReg(FlagReg1)
+ .addReg(FlagReg2);
+ constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
+
+ I.eraseFromParent();
+ return true;
+ }
+
+ X86::CondCode CC;
+ bool SwapArgs;
+ std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ unsigned Opc = X86::getSETFromCond(CC);
+
+ if (SwapArgs)
+ std::swap(LhsReg, RhsReg);
+
+ // Emit a compare of LHS/RHS.
+ MachineInstr &CmpInst =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+ .addReg(LhsReg)
+ .addReg(RhsReg);
+
+ MachineInstr &Set =
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg);
+ constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool X86InstructionSelector::selectUadde(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -1246,7 +1350,8 @@ bool X86InstructionSelector::selectUnmergeValues(
bool X86InstructionSelector::selectMergeValues(
MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF,
CodeGenCoverage &CoverageInfo) const {
- assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
+ assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
+ I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
"unexpected instruction");
// Split to inserts.
@@ -1485,23 +1590,33 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
return true;
}
-bool X86InstructionSelector::selectSDiv(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
-
- assert(I.getOpcode() == TargetOpcode::G_SDIV && "unexpected instruction");
+bool X86InstructionSelector::selectDivRem(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ // The implementation of this function is taken from X86FastISel.
+ assert((I.getOpcode() == TargetOpcode::G_SDIV ||
+ I.getOpcode() == TargetOpcode::G_SREM ||
+ I.getOpcode() == TargetOpcode::G_UDIV ||
+ I.getOpcode() == TargetOpcode::G_UREM) &&
+ "unexpected instruction");
const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned DividentReg = I.getOperand(1).getReg();
- const unsigned DiviserReg = I.getOperand(2).getReg();
+ const unsigned Op1Reg = I.getOperand(1).getReg();
+ const unsigned Op2Reg = I.getOperand(2).getReg();
const LLT RegTy = MRI.getType(DstReg);
- assert(RegTy == MRI.getType(DividentReg) &&
- RegTy == MRI.getType(DiviserReg) &&
+ assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
"Arguments and return value types must match");
const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ if (RegRB.getID() != X86::GPRRegBankID)
+ return false;
+ const static unsigned NumTypes = 4; // i8, i16, i32, i64
+ const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static bool S = true; // IsSigned
+ const static bool U = false; // !IsSigned
+ const static unsigned Copy = TargetOpcode::COPY;
// For the X86 IDIV instruction, in most cases the dividend
// (numerator) must be in a specific register pair highreg:lowreg,
// producing the quotient in lowreg and the remainder in highreg.
@@ -1510,56 +1625,182 @@ bool X86InstructionSelector::selectSDiv(MachineInstr &I,
// exception is i8, where the dividend is defined as a single register rather
// than a register pair, and we therefore directly sign-extend the dividend
// into lowreg, instead of copying, and ignore the highreg.
- const static struct SDivEntry {
+ const static struct DivRemEntry {
+ // The following portion depends only on the data type.
unsigned SizeInBits;
- unsigned QuotientReg;
- unsigned DividentRegUpper;
- unsigned DividentRegLower;
- unsigned OpSignExtend;
- unsigned OpCopy;
- unsigned OpDiv;
- } OpTable[] = {
- {8, X86::AL, X86::NoRegister, X86::AX, 0, X86::MOVSX16rr8,
- X86::IDIV8r}, // i8
- {16, X86::AX, X86::DX, X86::AX, X86::CWD, TargetOpcode::COPY,
- X86::IDIV16r}, // i16
- {32, X86::EAX, X86::EDX, X86::EAX, X86::CDQ, TargetOpcode::COPY,
- X86::IDIV32r}, // i32
- {64, X86::RAX, X86::RDX, X86::RAX, X86::CQO, TargetOpcode::COPY,
- X86::IDIV64r} // i64
+ unsigned LowInReg; // low part of the register pair
+ unsigned HighInReg; // high part of the register pair
+ // The following portion depends on both the data type and the operation.
+ struct DivRemResult {
+ unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ unsigned OpSignExtend; // Opcode for sign-extending lowreg into
+ // highreg, or copying a zero into highreg.
+ unsigned OpCopy; // Opcode for copying dividend into lowreg, or
+ // zero/sign-extending into lowreg for i8.
+ unsigned DivRemResultReg; // Register containing the desired result.
+ bool IsOpSigned; // Whether to use signed or unsigned form.
+ } ResultTable[NumOps];
+ } OpTable[NumTypes] = {
+ {8,
+ X86::AX,
+ 0,
+ {
+ {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
+ {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
+ {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
+ {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
+ }}, // i8
+ {16,
+ X86::AX,
+ X86::DX,
+ {
+ {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
+ {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
+ }}, // i16
+ {32,
+ X86::EAX,
+ X86::EDX,
+ {
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
+ }}, // i32
+ {64,
+ X86::RAX,
+ X86::RDX,
+ {
+ {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
+ {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
+ {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
+ {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
+ }}, // i64
};
- if (RegRB.getID() != X86::GPRRegBankID)
+ auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
+ [RegTy](const DivRemEntry &El) {
+ return El.SizeInBits == RegTy.getSizeInBits();
+ });
+ if (OpEntryIt == std::end(OpTable))
return false;
- auto SDivEntryIt = std::find_if(
- std::begin(OpTable), std::end(OpTable), [RegTy](const SDivEntry &El) {
- return El.SizeInBits == RegTy.getSizeInBits();
- });
+ unsigned OpIndex;
+ switch (I.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected div/rem opcode");
+ case TargetOpcode::G_SDIV:
+ OpIndex = 0;
+ break;
+ case TargetOpcode::G_SREM:
+ OpIndex = 1;
+ break;
+ case TargetOpcode::G_UDIV:
+ OpIndex = 2;
+ break;
+ case TargetOpcode::G_UREM:
+ OpIndex = 3;
+ break;
+ }
- if (SDivEntryIt == std::end(OpTable))
- return false;
+ const DivRemEntry &TypeEntry = *OpEntryIt;
+ const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
- if (!RBI.constrainGenericRegister(DividentReg, *RegRC, MRI) ||
- !RBI.constrainGenericRegister(DiviserReg, *RegRC, MRI) ||
+ if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
+ !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpCopy),
- SDivEntryIt->DividentRegLower)
- .addReg(DividentReg);
- if (SDivEntryIt->DividentRegUpper != X86::NoRegister)
+ // Move op1 into low-order input register.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
+ TypeEntry.LowInReg)
+ .addReg(Op1Reg);
+ // Zero-extend or sign-extend into high-order input register.
+ if (OpEntry.OpSignExtend) {
+ if (OpEntry.IsOpSigned)
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(OpEntry.OpSignExtend));
+ else {
+ unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
+ Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough
+ // to fit neatly into the table above.
+ if (RegTy.getSizeInBits() == 16) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+ TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (RegTy.getSizeInBits() == 32) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+ TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (RegTy.getSizeInBits() == 64) {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0)
+ .addReg(Zero32)
+ .addImm(X86::sub_32bit);
+ }
+ }
+ }
+ // Generate the DIV/IDIV instruction.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
+ .addReg(Op2Reg);
+ // For i8 remainder, we can't reference ah directly, as we'll end
+ // up with bogus copies like %r9b = COPY %ah. Reference ax
+ // instead to prevent ah references in a rex instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ if ((I.getOpcode() == Instruction::SRem ||
+ I.getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
+ unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
+ .addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
+ ResultSuperReg)
+ .addReg(SourceSuperReg)
+ .addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SDivEntryIt->OpSignExtend));
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpDiv))
- .addReg(DiviserReg);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
- DstReg)
- .addReg(SDivEntryIt->QuotientReg);
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(DstReg)
+ .addImm(0)
+ .addReg(ResultSuperReg)
+ .addImm(X86::sub_8bit);
+ } else {
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+ DstReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
+ I.eraseFromParent();
+ return true;
+}
+
+bool X86InstructionSelector::selectIntrinsicWSideEffects(
+ MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
+
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ "unexpected instruction");
+
+ if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
+ return false;
+
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
I.eraseFromParent();
return true;
diff --git a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 6c7fb9c339ac..28940754a203 100644
--- a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -463,7 +463,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
// {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
// Imm variable sets the offset amount. The result of the
// function is stored inside ShuffleMask vector and it built as described in
-// the begin of the description. AlignDirection is a boolean that indecat the
+// the begin of the description. AlignDirection is a boolean that indicates the
// direction of the alignment. (false - align to the "right" side while true -
// align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2dd60a1b8b5a..151e1b9136c4 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -20,18 +20,18 @@
namespace llvm {
enum IntrinsicType : uint16_t {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
- INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP_IMM8,
- CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
- CVTPD2PS, CVTPD2PS_MASK,
+ INTR_TYPE_3OP_IMM8,
+ CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
+ CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK,
- FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_SCALAR,
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG,
+ TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
@@ -64,11 +64,6 @@ struct IntrinsicData {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithChain[] = {
- X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
-
X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
@@ -120,6 +115,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -229,6 +249,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -270,9 +315,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
-
- X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
- X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
};
@@ -294,6 +336,8 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(addcarry_32, ADX, X86ISD::ADC, X86ISD::ADD),
+ X86_INTRINSIC_DATA(addcarry_64, ADX, X86ISD::ADC, X86ISD::ADD),
X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
@@ -325,10 +369,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -369,10 +409,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -384,14 +420,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
@@ -402,12 +438,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_b, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
+ X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
@@ -467,13 +507,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK,
+ X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
- X86ISD::VFPROUND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
ISD::FP_ROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
@@ -481,8 +521,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK,
+ X86ISD::CVTP2UI, X86ISD::MCVTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -531,8 +571,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VFPROUNDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::VFPEXTS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK,
+ X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
@@ -541,8 +581,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK,
+ X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -677,144 +717,114 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMULS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, TRUNCATE_TO_REG,
+ ISD::TRUNCATE, X86ISD::VMTRUNC),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK,
- X86ISD::MULTISHIFT, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
@@ -871,38 +881,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
- X86ISD::CVTPS2PH, 0),
-
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
- X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
+ X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
@@ -921,25 +905,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-
X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
@@ -967,30 +932,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prol_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_128, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_256, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_d_512, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_128, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_256, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_prolv_q_512, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_d_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_128, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_256, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_pror_q_512, INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_128, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_256, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_d_512, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_128, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_256, INTR_TYPE_2OP, ISD::ROTR, 0),
- X86_INTRINSIC_DATA(avx512_prorv_q_512, INTR_TYPE_2OP, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_128, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_256, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+ X86_INTRINSIC_DATA(avx512_pmultishift_qb_512, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
@@ -1068,14 +1012,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
- X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
- X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f64, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_pd_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
@@ -1124,26 +1068,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshld_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
- X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+ X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
+ X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
@@ -1151,6 +1082,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_comile_ss, COMI, X86ISD::COMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse_cvtss2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+ X86_INTRINSIC_DATA(sse_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1174,8 +1109,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvtpd2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvtsd2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1184,10 +1123,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(sse2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1209,10 +1144,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
@@ -1241,6 +1172,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB),
+ X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
@@ -1280,14 +1213,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
- X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
- X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, ISD::ROTL, 0),
- X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index d372cada8de8..4a49fa68dd06 100644
--- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -133,7 +133,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
// Shifts and SDIV
- getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
.legalFor({s8, s16, s32})
.clampScalar(0, s8, s32);
}
@@ -219,13 +220,27 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0);
+ getActionDefinitionsBuilder(G_FPTOSI)
+ .legalForCartesianProduct({s32, s64})
+ .clampScalar(1, s32, s64)
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1);
+
// Comparison
setAction({G_ICMP, 1, s64}, Legal);
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalForCartesianProduct({s8}, {s32, s64})
+ .clampScalar(0, s8, s8)
+ .clampScalar(1, s32, s64)
+ .widenScalarToNextPow2(1);
+
// Shifts and SDIV
- getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
- .legalFor({s8, s16, s32, s64})
- .clampScalar(0, s8, s64);
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
+ .legalFor({s8, s16, s32, s64})
+ .clampScalar(0, s8, s64);
// Merge/Unmerge
setAction({G_MERGE_VALUES, s128}, Legal);
@@ -256,7 +271,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE1() {
// Merge/Unmerge
for (const auto &Ty : {v4s32, v2s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
setAction({G_MERGE_VALUES, 1, s64}, Legal);
@@ -292,17 +307,20 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
setAction({G_FPEXT, s64}, Legal);
setAction({G_FPEXT, 1, s32}, Legal);
+ setAction({G_FPTRUNC, s32}, Legal);
+ setAction({G_FPTRUNC, 1, s64}, Legal);
+
// Constants
setAction({TargetOpcode::G_FCONSTANT, s64}, Legal);
// Merge/Unmerge
for (const auto &Ty :
{v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
@@ -349,12 +367,12 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
// Merge/Unmerge
for (const auto &Ty :
{v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty :
{v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
@@ -382,11 +400,11 @@ void X86LegalizerInfo::setLegalizerInfoAVX2() {
// Merge/Unmerge
for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) {
- setAction({G_MERGE_VALUES, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, Ty}, Legal);
setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
}
for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) {
- setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+ setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
setAction({G_UNMERGE_VALUES, Ty}, Legal);
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index d38c7b497965..2816f8c62bfb 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -132,6 +132,9 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
// Handle dllimport linkage.
Name += "__imp_";
break;
+ case X86II::MO_COFFSTUB:
+ Name += ".refptr.";
+ break;
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
Suffix = "$non_lazy_ptr";
@@ -160,6 +163,17 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
switch (MO.getTargetFlags()) {
default:
break;
+ case X86II::MO_COFFSTUB: {
+ MachineModuleInfoCOFF &MMICOFF =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
+ MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
+ if (!StubSym.getPointer()) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::StubValueTy(
+ AsmPrinter.getSymbol(MO.getGlobal()), true);
+ }
+ break;
+ }
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -191,6 +205,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DLLIMPORT:
+ case X86II::MO_COFFSTUB:
break;
case X86II::MO_TLVP:
@@ -512,7 +527,7 @@ ReSimplify:
}
case X86::CLEANUPRET: {
- // Replace CATCHRET with the appropriate RET.
+ // Replace CLEANUPRET with the appropriate RET.
OutMI = MCInst();
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
break;
@@ -584,54 +599,6 @@ ReSimplify:
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
- // Atomic load and store require a separate pseudo-inst because Acquire
- // implies mayStore and Release implies mayLoad; fix these to regular MOV
- // instructions here
- case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
- case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
- case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
- case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
- case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
- case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
- case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
- case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
- case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
- case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
- case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
- case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
- case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
- case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
- case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
- case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
- case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
- case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
- case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
- case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
- case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
- case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
- case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
- case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
- case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
- case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
- case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
- case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
- case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
- case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
- case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
- case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
- case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
- case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
- case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
- case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
- case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
- case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
- case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
- case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify;
- case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
- case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
- case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
- case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
-
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.
@@ -946,7 +913,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpoline())
+ if (Subtarget->useRetpolineIndirectCalls())
report_fatal_error("Lowering register statepoints with retpoline not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
@@ -1103,7 +1070,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpoline())
+ if (Subtarget->useRetpolineIndirectCalls())
report_fatal_error(
"Lowering patchpoint with retpoline not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
@@ -1412,7 +1379,7 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
static const Constant *getConstantFromPool(const MachineInstr &MI,
const MachineOperand &Op) {
- if (!Op.isCPI())
+ if (!Op.isCPI() || Op.getOffset() != 0)
return nullptr;
ArrayRef<MachineConstantPoolEntry> Constants =
@@ -1424,7 +1391,7 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
if (ConstantEntry.isMachineConstantPoolEntry())
return nullptr;
- auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
+ const Constant *C = ConstantEntry.Val.ConstVal;
assert((!C || ConstantEntry.getType() == C->getType()) &&
"Expected a constant of the same type!");
return C;
@@ -1515,27 +1482,35 @@ static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
return Comment;
}
+static void printConstant(const APInt &Val, raw_ostream &CS) {
+ if (Val.getBitWidth() <= 64) {
+ CS << Val.getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
+}
+
+static void printConstant(const APFloat &Flt, raw_ostream &CS) {
+ SmallString<32> Str;
+ // Force scientific notation to distinquish from integers.
+ Flt.toString(Str, 0, 0);
+ CS << Str;
+}
+
static void printConstant(const Constant *COp, raw_ostream &CS) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- if (CI->getBitWidth() <= 64) {
- CS << CI->getZExtValue();
- } else {
- // print multi-word constant as (w0,w1)
- const auto &Val = CI->getValue();
- CS << "(";
- for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
- if (i > 0)
- CS << ",";
- CS << Val.getRawData()[i];
- }
- CS << ")";
- }
+ printConstant(CI->getValue(), CS);
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
- SmallString<32> Str;
- CF->getValueAPF().toString(Str);
- CS << Str;
+ printConstant(CF->getValueAPF(), CS);
} else {
CS << "?";
}
@@ -1558,6 +1533,9 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
case X86::SEH_StackAlloc:
XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
break;
+ case X86::SEH_StackAlign:
+ XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
+ break;
case X86::SEH_SetFrame:
assert(MI->getOperand(1).getImm() == 0 &&
".cv_fpo_setframe takes no offset");
@@ -1617,6 +1595,18 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
}
}
+static unsigned getRegisterWidth(const MCOperandInfo &Info) {
+ if (Info.RegClass == X86::VR128RegClassID ||
+ Info.RegClass == X86::VR128XRegClassID)
+ return 128;
+ if (Info.RegClass == X86::VR256RegClassID ||
+ Info.RegClass == X86::VR256XRegClassID)
+ return 256;
+ if (Info.RegClass == X86::VR512RegClassID)
+ return 512;
+ llvm_unreachable("Unknown register class!");
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI =
@@ -1720,41 +1710,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- case X86::MOVGOT64r: {
- // Materializes the GOT for the 64-bit large code model.
- MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
-
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned ScratchReg = MI->getOperand(1).getReg();
- MCSymbol *GOTSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
- // .LtmpN: leaq .LtmpN(%rip), %dst
- const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
- EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
- .addReg(DstReg) // dest
- .addReg(X86::RIP) // base
- .addImm(1) // scale
- .addReg(0) // index
- .addExpr(DotExpr) // disp
- .addReg(0)); // seg
-
- // movq $_GLOBAL_OFFSET_TABLE_ - .LtmpN, %scratch
- const MCExpr *GOTSymExpr = MCSymbolRefExpr::create(GOTSym, OutContext);
- const MCExpr *GOTDiffExpr =
- MCBinaryExpr::createSub(GOTSymExpr, DotExpr, OutContext);
- EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
- .addReg(ScratchReg) // dest
- .addExpr(GOTDiffExpr)); // disp
-
- // addq %scratch, %dst
- EmitAndCountInstruction(MCInstBuilder(X86::ADD64rr)
- .addReg(DstReg) // dest
- .addReg(DstReg) // dest
- .addReg(ScratchReg)); // src
- return;
- }
-
case X86::ADD32ri: {
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
@@ -1835,6 +1790,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::SEH_SaveReg:
case X86::SEH_SaveXMM:
case X86::SEH_StackAlloc:
+ case X86::SEH_StackAlign:
case X86::SEH_SetFrame:
case X86::SEH_PushFrame:
case X86::SEH_EndPrologue:
@@ -1901,8 +1857,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 64> Mask;
- DecodePSHUFBMask(C, Mask);
+ DecodePSHUFBMask(C, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
!EnablePrintSchedInfo);
@@ -1973,8 +1930,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPERMILPMask(C, ElSize, Mask);
+ DecodeVPERMILPMask(C, ElSize, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
!EnablePrintSchedInfo);
@@ -2004,8 +1962,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
+ DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
!EnablePrintSchedInfo);
@@ -2021,8 +1980,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MaskOp = MI->getOperand(6);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
- DecodeVPPERMMask(C, Mask);
+ DecodeVPPERMMask(C, Width, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
!EnablePrintSchedInfo);
@@ -2129,11 +2089,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (i != 0 || l != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
- CS << CDS->getElementAsInteger(i);
- else if (CDS->getElementType()->isFloatTy())
- CS << CDS->getElementAsFloat(i);
- else if (CDS->getElementType()->isDoubleTy())
- CS << CDS->getElementAsDouble(i);
+ printConstant(CDS->getElementAsAPInt(i), CS);
+ else if (CDS->getElementType()->isHalfTy() ||
+ CDS->getElementType()->isFloatTy() ||
+ CDS->getElementType()->isDoubleTy())
+ printConstant(CDS->getElementAsAPFloat(i), CS);
else
CS << "?";
}
@@ -2155,6 +2115,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
break;
+ case X86::MOVDDUPrm:
+ case X86::VMOVDDUPrm:
+ case X86::VMOVDDUPZ128rm:
case X86::VBROADCASTSSrm:
case X86::VBROADCASTSSYrm:
case X86::VBROADCASTSSZ128m:
@@ -2191,6 +2154,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
int NumElts;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
+ case X86::MOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPZ128rm: NumElts = 2; break;
case X86::VBROADCASTSSrm: NumElts = 4; break;
case X86::VBROADCASTSSYrm: NumElts = 8; break;
case X86::VBROADCASTSSZ128m: NumElts = 4; break;
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
index df3abb17014d..5c09597d0442 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -79,53 +79,46 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::TEST8ri:
case X86::TEST16ri:
case X86::TEST32ri:
- case X86::TEST32i32:
- case X86::TEST64i32:
case X86::TEST64ri32:
case X86::TEST8mr:
case X86::TEST16mr:
case X86::TEST32mr:
case X86::TEST64mr:
- case X86::AND16i16:
case X86::AND16ri:
case X86::AND16ri8:
case X86::AND16rm:
case X86::AND16rr:
- case X86::AND32i32:
case X86::AND32ri:
case X86::AND32ri8:
case X86::AND32rm:
case X86::AND32rr:
- case X86::AND64i32:
case X86::AND64ri32:
case X86::AND64ri8:
case X86::AND64rm:
case X86::AND64rr:
- case X86::AND8i8:
case X86::AND8ri:
case X86::AND8rm:
case X86::AND8rr:
return true;
- case X86::CMP16i16:
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP16rm:
case X86::CMP16rr:
- case X86::CMP32i32:
+ case X86::CMP16mr:
case X86::CMP32ri:
case X86::CMP32ri8:
case X86::CMP32rm:
case X86::CMP32rr:
- case X86::CMP64i32:
+ case X86::CMP32mr:
case X86::CMP64ri32:
case X86::CMP64ri8:
case X86::CMP64rm:
case X86::CMP64rr:
- case X86::CMP8i8:
+ case X86::CMP64mr:
case X86::CMP8ri:
case X86::CMP8rm:
case X86::CMP8rr:
- case X86::ADD16i16:
+ case X86::CMP8mr:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri8_DB:
@@ -133,7 +126,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD16rm:
case X86::ADD16rr:
case X86::ADD16rr_DB:
- case X86::ADD32i32:
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32ri8_DB:
@@ -141,7 +133,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD32rm:
case X86::ADD32rr:
case X86::ADD32rr_DB:
- case X86::ADD64i32:
case X86::ADD64ri32:
case X86::ADD64ri32_DB:
case X86::ADD64ri8:
@@ -149,28 +140,21 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD64rm:
case X86::ADD64rr:
case X86::ADD64rr_DB:
- case X86::ADD8i8:
- case X86::ADD8mi:
- case X86::ADD8mr:
case X86::ADD8ri:
case X86::ADD8rm:
case X86::ADD8rr:
- case X86::SUB16i16:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB16rm:
case X86::SUB16rr:
- case X86::SUB32i32:
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB32rm:
case X86::SUB32rr:
- case X86::SUB64i32:
case X86::SUB64ri32:
case X86::SUB64ri8:
case X86::SUB64rm:
case X86::SUB64rr:
- case X86::SUB8i8:
case X86::SUB8ri:
case X86::SUB8rm:
case X86::SUB8rr:
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.h b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
index 13fa2d78a018..97ef1d6d3b61 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.h
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+#define LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -23,3 +26,5 @@ std::unique_ptr<ScheduleDAGMutation>
createX86MacroFusionDAGMutation();
} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 42db51b3cf01..b56d02b6bfb6 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -510,12 +510,16 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
MemOpNo += X86II::getOperandBias(Desc);
+ // Do not call chooseBestLEA if there was no matching LEA
+ auto Insns = LEAs.find(getMemOpKey(MI, MemOpNo));
+ if (Insns == LEAs.end())
+ continue;
+
// Get the best LEA instruction to replace address calculation.
MachineInstr *DefMI;
int64_t AddrDispShift;
int Dist;
- if (!chooseBestLEA(LEAs[getMemOpKey(MI, MemOpNo)], MI, DefMI, AddrDispShift,
- Dist))
+ if (!chooseBestLEA(Insns->second, MI, DefMI, AddrDispShift, Dist))
continue;
// If LEA occurs before current instruction, we can freely replace
diff --git a/contrib/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
index 093fbafa3fba..a1a4210b5ebf 100644
--- a/contrib/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
@@ -11,67 +11,216 @@
//
//===----------------------------------------------------------------------===//
-let SchedModel = SandyBridgeModel in {
-def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
-def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
-def SBPort23Counter : PfmIssueCounter<SBPort23,
- ["uops_dispatched_port:port_2",
- "uops_dispatched_port:port_3"]>;
-def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
-def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
-}
-
-let SchedModel = HaswellModel in {
-def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
-def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
-def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
-def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
-def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
-def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
-def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
-def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BroadwellModel in {
-def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
-def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
-def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
-def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
-def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
-def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
-def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
-def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
-}
-
-let SchedModel = SkylakeClientModel in {
-def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
-def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
-def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
-def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
-def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
-def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
-def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
-def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = SkylakeServerModel in {
-def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
-def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
-def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
-def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
-def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
-def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
-def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
-def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BtVer2Model in {
-def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
-def JFPU0Counter : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
-def JFPU1Counter : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
+def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">;
+def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">;
+
+// No default counters on X86.
+def DefaultPfmCounters : ProcPfmCounters {}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
+
+// Intel X86 Counters.
+def PentiumPfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"uops_retired">;
+}
+def : PfmCountersBinding<"pentiumpro", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium2", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3m", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium-m", PentiumPfmCounters>;
+
+def CorePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"yonah", CorePfmCounters>;
+def : PfmCountersBinding<"prescott", CorePfmCounters>;
+def : PfmCountersBinding<"core2", CorePfmCounters>;
+def : PfmCountersBinding<"penryn", CorePfmCounters>;
+def : PfmCountersBinding<"nehalem", CorePfmCounters>;
+def : PfmCountersBinding<"corei7", CorePfmCounters>;
+def : PfmCountersBinding<"westmere", CorePfmCounters>;
+
+def AtomPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"bonnell", AtomPfmCounters>;
+def : PfmCountersBinding<"atom", AtomPfmCounters>;
+
+def SLMPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"silvermont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont-plus", SLMPfmCounters>;
+def : PfmCountersBinding<"tremont", SLMPfmCounters>;
+
+def KnightPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = PfmCounter<"uops_retired:all">;
+}
+def : PfmCountersBinding<"knl", KnightPfmCounters>;
+def : PfmCountersBinding<"knm", KnightPfmCounters>;
+
+def SandyBridgePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SBPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SBPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5">
+ ];
+}
+def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
+def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
+
+def HaswellPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
+
+def BroadwellPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">,
+ PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">,
+ PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">,
+ PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">,
+ PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">,
+ PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">,
+ PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
+ PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
+
+def SkylakeClientPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
+
+def SkylakeServerPfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">,
+ PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">,
+ PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">,
+ PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
+ ];
+}
+def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-client", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-server", SkylakeServerPfmCounters>;
+
+// AMD X86 Counters.
+// Set basic counters for AMD cpus that we know libpfm4 supports.
+def DefaultAMDPfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+}
+def : PfmCountersBinding<"athlon", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-tbird", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-4", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-xp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-mp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-fx", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"amdfam10", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"barcelona", DefaultAMDPfmCounters>;
+
+def BdVer2PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"PdFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+ PfmIssueCounter<"PdFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+ PfmIssueCounter<"PdFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">,
+ PfmIssueCounter<"PdFPU3", "dispatched_fpu_ops:ops_pipe3 + dispatched_fpu_ops:ops_dual_pipe3">
+ ];
+}
+def : PfmCountersBinding<"bdver1", BdVer2PfmCounters>;
+def : PfmCountersBinding<"bdver2", BdVer2PfmCounters>;
+
+def BdVer3PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"SrFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+ PfmIssueCounter<"SrFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+ PfmIssueCounter<"SrFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">
+ ];
+}
+def : PfmCountersBinding<"bdver3", BdVer3PfmCounters>;
+def : PfmCountersBinding<"bdver4", BdVer3PfmCounters>;
+
+def BtVer1PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"BtFPU0", "dispatched_fpu:pipe0">,
+ PfmIssueCounter<"BtFPU1", "dispatched_fpu:pipe1">
+ ];
+}
+def : PfmCountersBinding<"btver1", BtVer1PfmCounters>;
+
+def BtVer2PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">,
+ PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1">
+ ];
+}
+def : PfmCountersBinding<"btver2", BtVer2PfmCounters>;
+
+def ZnVer1PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cycles_not_in_halt">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"ZnFPU0", "fpu_pipe_assignment:total0">,
+ PfmIssueCounter<"ZnFPU1", "fpu_pipe_assignment:total1">,
+ PfmIssueCounter<"ZnFPU2", "fpu_pipe_assignment:total2">,
+ PfmIssueCounter<"ZnFPU3", "fpu_pipe_assignment:total3">,
+ PfmIssueCounter<"ZnDivider", "div_op_count">
+ ];
}
+def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 246d6d5a58d0..355291916ee8 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -194,19 +194,40 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCONSTANT:
// Instruction having only floating-point operands (all scalars in VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
break;
- case TargetOpcode::G_SITOFP: {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_FPTOSI: {
// Some of the floating-point instructions have mixed GPR and FP operands:
// fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
auto &Op1 = MI.getOperand(1);
const LLT Ty0 = MRI.getType(Op0.getReg());
const LLT Ty1 = MRI.getType(Op1.getReg());
- OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ true);
- OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ false);
+
+ bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
+ bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+ OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ FirstArgIsFP);
+ OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ SecondArgIsFP);
+ break;
+ }
+ case TargetOpcode::G_FCMP: {
+ LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ (void)Ty2;
+ assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+ "Mismatched operand sizes for G_FCMP");
+
+ unsigned Size = Ty1.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto FpRegBank = getPartialMappingIdx(Ty1, /* isFP */ true);
+ OpRegBankIdx = {PMI_GPR8,
+ /* Predicate */ PMI_None, FpRegBank, FpRegBank};
break;
}
case TargetOpcode::G_TRUNC:
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index ee9e7891f9f6..aa20273f89ab 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -436,11 +436,12 @@ def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
-def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESP)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
- R8, R9, R11, RIP)>;
+ R8, R9, R11, RIP, RSP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
- R8, R9, R10, R11, RIP)>;
+ R8, R9, R10, R11,
+ RIP, RSP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -499,6 +500,16 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
+// Classes to support the 64-bit assembler constraint tied to a fixed
+// register in 32-bit mode. The second register is always the next in
+// the list. Wrap around causes an error.
+def GR32_DC : RegisterClass<"X86", [i32], 32, (add EDX, ECX)>;
+def GR32_CB : RegisterClass<"X86", [i32], 32, (add ECX, EBX)>;
+def GR32_BSI : RegisterClass<"X86", [i32], 32, (add EBX, ESI)>;
+def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>;
+def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>;
+def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>;
+
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
@@ -511,10 +522,16 @@ def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
+
def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+// st(7) may be is not allocatable.
+def RFP80_7 : RegisterClass<"X86",[f80], 32, (add FP7)> {
+ let isAllocatable = 0;
+}
+
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
diff --git a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
index 250deb3523b4..08994cccb21e 100644
--- a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,7 @@ private:
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
+ void populateThunk(MachineFunction &MF, unsigned Reg);
};
} // end anonymous namespace
@@ -115,7 +115,9 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
// FIXME: It's a little silly to look at every function just to enumerate
// the subtargets, but eventually we'll want to look at them for indirect
// calls, so maybe this is OK.
- if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
+ if ((!STI->useRetpolineIndirectCalls() &&
+ !STI->useRetpolineIndirectBranches()) ||
+ STI->useRetpolineExternalThunk())
return false;
// Otherwise, we need to insert the thunk.
@@ -234,25 +236,33 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
}
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
- Optional<unsigned> Reg) {
+ unsigned Reg) {
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MCSymbol *TargetSym = MF.getContext().createTempSymbol();
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
- BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
- Entry->addSuccessor(CallTarget);
+ Entry->addLiveIn(Reg);
+ BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+ // The MIR verifier thinks that the CALL in the entry block will fall through
+ // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+ // the successor, but the MIR verifier doesn't know how to cope with that.
Entry->addSuccessor(CaptureSpec);
- CallTarget->setHasAddressTaken();
// In the capture loop for speculation, we want to stop the processor from
// speculating as fast as possible. On Intel processors, the PAUSE instruction
@@ -268,7 +278,10 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->setHasAddressTaken();
CaptureSpec->addSuccessor(CaptureSpec);
+ CallTarget->addLiveIn(Reg);
+ CallTarget->setHasAddressTaken();
CallTarget->setAlignment(4);
- insertRegReturnAddrClobber(*CallTarget, *Reg);
+ insertRegReturnAddrClobber(*CallTarget, Reg);
+ CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
index 6334d9e89a60..971a50196e45 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -72,10 +72,16 @@ def BWDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def BWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/5/6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/5/6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -108,22 +114,47 @@ def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op.
-defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
-defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication.
-defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
+// Integer multiplication.
+defm : BWWriteResPair<WriteIMul8, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul16, [BWPort1,BWPort06,BWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// TODO: Why isn't the BWDivider used consistently?
+defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
+defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
@@ -136,8 +167,14 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
-def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
+
+defm : X86WriteRes<WriteLAHFSAHF, [BWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [BWPort06], 1, [1], 1>; // Bit Test instrs
+defm : X86WriteRes<WriteBitTestImmLd, [BWPort06,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [BWPort0156,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [BWPort06], 1, [1], 1>; // Bit Test + Set instrs
+defm : X86WriteRes<WriteBitTestSetImmLd, [BWPort06,BWPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [BWPort0156,BWPort23], 5, [1,1], 2>;
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -147,7 +184,10 @@ defm : BWWriteResPair<WriteTZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
-defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
+defm : BWWriteResPair<WriteRotate, [BWPort06], 2, [2], 2>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
@@ -155,9 +195,10 @@ defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
-defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
+defm : BWWriteResPair<WriteBLS, [BWPort15], 1>;
+defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
@@ -582,7 +623,7 @@ def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[BWWriteResGroup3], (instrs MMX_MOVQ2DQrr)>;
def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
let Latency = 1;
@@ -610,10 +651,7 @@ def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr")>;
def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> {
let Latency = 1;
@@ -627,19 +665,19 @@ def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+def: InstRW<[BWWriteResGroup9], (instrs SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
- "ST_FP(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup10], (instrs FBSTPm)>;
+def: InstRW<[BWWriteResGroup10], (instregex "ST_FP(32|64|80)m")>;
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
@@ -648,16 +686,6 @@ def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
}
def: InstRW<[BWWriteResGroup12], (instrs FDECSTP)>;
-def BWWriteResGroup13 : SchedWriteRes<[BWPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[BWWriteResGroup13], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri")>;
-
def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -688,7 +716,7 @@ def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup17], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[BWWriteResGroup17], (instrs MMX_MOVDQ2Qrr)>;
def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> {
let Latency = 2;
@@ -702,11 +730,10 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup20], (instrs CWD)>;
-def: InstRW<[BWWriteResGroup20], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[BWWriteResGroup20], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri",
"SBB8ri",
"SET(A|BE)r")>;
@@ -729,53 +756,35 @@ def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>;
def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
- "PDEP(32|64)rr",
- "PEXT(32|64)rr",
+def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
-
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
- "VPBROADCASTWrr")>;
-
-def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[BWWriteResGroup30], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[BWWriteResGroup28], (instrs VPBROADCASTBrr,
+ VPBROADCASTWrr)>;
def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 3;
@@ -789,21 +798,8 @@ def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
-
-def BWWriteResGroup36 : SchedWriteRes<[BWPort06,BWPort0156]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup36], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
+def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> {
let Latency = 3;
@@ -835,7 +831,7 @@ def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>;
def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
let Latency = 4;
@@ -849,9 +845,8 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr)>;
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
- "MMX_CVT(T?)PD2PIirr",
+def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>;
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr",
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
@@ -861,13 +856,6 @@ def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>;
-
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -890,10 +878,10 @@ def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
}
def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>;
-def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> {
- let Latency = 4;
+def BWWriteResGroup46 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>;
@@ -910,17 +898,14 @@ def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
- "VBROADCASTSSrm",
- "(V?)MOVDDUPrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)")>;
+def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm,
+ VMOVDDUPrm, MOVDDUPrm,
+ VMOVSHDUPrm, MOVSHDUPrm,
+ VMOVSLDUPrm, MOVSLDUPrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm)>;
def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 5;
@@ -936,13 +921,6 @@ def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> {
}
def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
-def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -957,13 +935,6 @@ def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup55], (instrs XSETBV)>;
-def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -976,50 +947,44 @@ def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
- "(V?)CVTSS2SDrm",
- "VPSLLVQrm",
- "VPSRLVQrm")>;
+def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
+ CVTSS2SDrm, VCVTSS2SDrm,
+ VPSLLVQrm,
+ VPSRLVQrm)>;
def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
- "VCVTPD2PSYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[BWWriteResGroup60], (instrs VCVTDQ2PDYrr,
+ VCVTPD2PSYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup62], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[BWWriteResGroup62], (instrs FARJMP64)>;
+def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 6;
@@ -1027,9 +992,6 @@ def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
@@ -1037,9 +999,9 @@ def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup65], (instregex "VINSERTF128rm",
- "VINSERTI128rm",
- "VPBLENDDrmi")>;
+def: InstRW<[BWWriteResGroup65], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 6;
@@ -1061,15 +1023,9 @@ def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[BWWriteResGroup69], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 6;
@@ -1091,8 +1047,8 @@ def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm",
- "VPSRLVQYrm")>;
+def: InstRW<[BWWriteResGroup73], (instrs VPSLLVQYrm,
+ VPSRLVQYrm)>;
def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> {
let Latency = 7;
@@ -1106,16 +1062,16 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[BWWriteResGroup77], (instrs VPBLENDDYrmi)>;
def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
@@ -1144,10 +1100,8 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 7;
@@ -1161,8 +1115,8 @@ def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup89], (instrs FARCALL64)>;
def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> {
let Latency = 7;
@@ -1176,54 +1130,31 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
- "PDEP(32|64)rm",
- "PEXT(32|64)rm",
- "(V?)CVTDQ2PSrm")>;
-
-def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSirm,
+ CVTDQ2PSrm,
+ VCVTDQ2PSrm)>;
+def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVSXWQYrm",
- "VPMOVZXWDYrm")>;
+def: InstRW<[BWWriteResGroup92], (instrs VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVSXWQYrm,
+ VPMOVZXWDYrm)>;
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
-
-def BWWriteResGroup98 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup98], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 8;
@@ -1238,8 +1169,8 @@ def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
let ResourceCycles = [1,1,1,2,1];
}
def : SchedAlias<WriteADCRMW, BWWriteResGroup100>;
-def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(8|16|32|64)rm",
- "ROL(8|16|32|64)mCL",
+def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1250,9 +1181,9 @@ def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VCVTPS2DQYrm",
- "VCVTTPS2DQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 9;
@@ -1270,18 +1201,18 @@ def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup106], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>;
def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVTPI2PDirm",
- "MMX_CVT(T?)PD2PIirm",
+def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPI2PDirm)>;
+def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIirm",
"(V?)CVTDQ2PDrm",
"(V?)CVTSD2SSrm")>;
@@ -1298,7 +1229,7 @@ def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
-def: InstRW<[BWWriteResGroup112], (instregex "RDRAND(16|32|64)r")>;
+def: InstRW<[BWWriteResGroup112], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
let Latency = 9;
@@ -1329,13 +1260,6 @@ def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
}
def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
-def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
@@ -1348,15 +1272,15 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
- "VPCMPGTQYrm")>;
+def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>;
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[BWWriteResGroup128], (instrs VCVTDQ2PDYrm)>;
def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
let Latency = 11;
@@ -1371,7 +1295,7 @@ def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let NumMicroOps = 9;
let ResourceCycles = [1,4,1,3];
}
-def: InstRW<[BWWriteResGroup132], (instregex "RCL8rCL")>;
+def: InstRW<[BWWriteResGroup132], (instrs RCL8rCL)>;
def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> {
let Latency = 11;
@@ -1414,7 +1338,7 @@ def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
let NumMicroOps = 10;
let ResourceCycles = [2,3,1,4];
}
-def: InstRW<[BWWriteResGroup145], (instregex "RCR8rCL")>;
+def: InstRW<[BWWriteResGroup145], (instrs RCR8rCL)>;
def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> {
let Latency = 14;
@@ -1451,10 +1375,10 @@ def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
}
def: InstRW<[BWWriteResGroup153], (instrs CMPXCHG8B)>;
-def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def BWWriteResGroup154 : SchedWriteRes<[BWPort5,BWPort6]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
@@ -1513,7 +1437,7 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let NumMicroOps = 18;
let ResourceCycles = [1,1,16];
}
-def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>;
+def: InstRW<[BWWriteResGroup172], (instrs POPF64)>;
def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let Latency = 23;
@@ -1608,13 +1532,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor
def: InstRW<[BWWriteResGroup186], (instrs XSAVE)>;
def: InstRW<[BWWriteResGroup186], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
-def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
- let Latency = 34;
- let NumMicroOps = 8;
- let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup190], (instregex "DIV(8|16|32|64)m")>;
-
def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> {
let Latency = 34;
let NumMicroOps = 23;
@@ -1623,13 +1540,6 @@ def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort
def: InstRW<[BWWriteResGroup191], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
-def BWWriteResGroup193 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
- let Latency = 35;
- let NumMicroOps = 8;
- let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup193], (instregex "IDIV(8|16|32|64)m")>;
-
def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
@@ -1673,13 +1583,6 @@ def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup200], (instrs FNINIT)>;
-def BWWriteResGroup201 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156]> {
- let Latency = 80;
- let NumMicroOps = 32;
- let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[BWWriteResGroup201], (instregex "DIV(16|32|64)r")>;
-
def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 876c3e4162cf..06a32fb0b1cd 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -77,10 +77,16 @@ def HWDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def HWFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -121,16 +127,32 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
-defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
+
+// Integer multiplication.
+defm : HWWriteResPair<WriteIMul8, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul16, [HWPort1,HWPort06,HWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
-
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+defm : X86WriteRes<WriteCMPXCHG,[HWPort06, HWPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
// Integer shifts and rotates.
-defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
+defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
+defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
+defm : HWWriteResPair<WriteRotate, [HWPort06], 2, [2], 2>;
+defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
@@ -149,8 +171,14 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
-def : WriteRes<WriteBitTest,[HWPort06]>;
+
+defm : X86WriteRes<WriteLAHFSAHF, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [HWPort06,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [], 1, [], 10>;
+defm : X86WriteRes<WriteBitTestSet, [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [HWPort06,HWPort23], 6, [1,1], 3>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [], 1, [], 11>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -164,18 +192,29 @@ defm : HWWriteResPair<WriteLZCNT, [HWPort1], 3>;
defm : HWWriteResPair<WriteTZCNT, [HWPort1], 3>;
defm : HWWriteResPair<WritePOPCNT, [HWPort1], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
-
-defm : HWWriteResPair<WriteDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv8, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteBLS, [HWPort15], 1>;
+defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
+
+// TODO: Why isn't the HWDivider used?
+defm : X86WriteRes<WriteDiv8, [HWPort0,HWPort1,HWPort5,HWPort6], 22, [], 9>;
+defm : X86WriteRes<WriteDiv16, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [HWPort0,HWPort1,HWPort5,HWPort6], 23, [], 9>;
+defm : X86WriteRes<WriteIDiv16, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [HWPort01], 1, [1], 1>;
@@ -614,35 +653,12 @@ def : InstRW<[HWWritePopA], (instregex "POPA(16|32)")>;
//-- Arithmetic instructions --//
-// DIV.
-// r8.
-def HWWriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
- let Latency = 22;
- let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteDiv8], (instregex "DIV8r")>;
-
-// IDIV.
-// r8.
-def HWWriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
- let Latency = 23;
- let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteIDiv8], (instregex "IDIV8r")>;
-
-// BT.
-// m,r.
-def HWWriteBTmr : SchedWriteRes<[]> {
- let NumMicroOps = 10;
-}
-def : InstRW<[HWWriteBTmr], (instregex "BT(16|32|64)mr")>;
-
// BTR BTS BTC.
// m,r.
def HWWriteBTRSCmr : SchedWriteRes<[]> {
let NumMicroOps = 11;
}
-def : InstRW<[HWWriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, HWWriteBTRSCmr>;
//-- Control transfer instructions --//
@@ -704,14 +720,14 @@ def HWWriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
let NumMicroOps = 17;
let ResourceCycles = [1, 16];
}
-def : InstRW<[HWWriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[HWWriteRDRAND], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
//=== Floating Point x87 Instructions ===//
//-- Move instructions --//
// FLD.
// m80.
-def : InstRW<[HWWriteP01], (instregex "LD_Frr")>;
+def : InstRW<[HWWriteP01], (instrs LD_Frr)>;
// FBLD.
// m80.
@@ -719,7 +735,7 @@ def HWWriteFBLD : SchedWriteRes<[]> {
let Latency = 47;
let NumMicroOps = 43;
}
-def : InstRW<[HWWriteFBLD], (instregex "FBLDm")>;
+def : InstRW<[HWWriteFBLD], (instrs FBLDm)>;
// FST(P).
// r.
@@ -732,13 +748,13 @@ def : InstRW<[HWWriteP01], (instregex "FFREE")>;
def HWWriteFNSAVE : SchedWriteRes<[]> {
let NumMicroOps = 147;
}
-def : InstRW<[HWWriteFNSAVE], (instregex "FSAVEm")>;
+def : InstRW<[HWWriteFNSAVE], (instrs FSAVEm)>;
// FRSTOR.
def HWWriteFRSTOR : SchedWriteRes<[]> {
let NumMicroOps = 90;
}
-def : InstRW<[HWWriteFRSTOR], (instregex "FRSTORm")>;
+def : InstRW<[HWWriteFRSTOR], (instrs FRSTORm)>;
//-- Arithmetic instructions --//
@@ -812,8 +828,8 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
+def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
+def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
"(V?)MOVSLDUPrm",
"VPBROADCAST(D|Q)rm")>;
@@ -822,14 +838,14 @@ def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
+def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm)>;
def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
"VPBROADCAST(D|Q)Yrm")>;
def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
@@ -837,11 +853,8 @@ def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>;
def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
@@ -849,9 +862,8 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[HWWriteResGroup1], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[HWWriteResGroup1], (instregex "ST_FP(32|64|80)m")>;
def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
let Latency = 1;
@@ -874,7 +886,7 @@ def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[HWWriteResGroup4], (instrs MMX_MOVQ2DQrr)>;
def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
let Latency = 1;
@@ -902,10 +914,7 @@ def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> {
let Latency = 1;
@@ -920,12 +929,12 @@ def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 6;
@@ -939,40 +948,23 @@ def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
- "VPSLLVQrm",
- "VPSRLVQrm")>;
+def: InstRW<[HWWriteResGroup11_1], (instrs VPSLLVQrm, VPSRLVQrm)>;
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm")>;
def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm",
- "VPSRLVQYrm")>;
+def: InstRW<[HWWriteResGroup11_2], (instrs VPSLLVQYrm, VPSRLVQYrm)>;
def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm",
- "PDEP(32|64)rm",
- "PEXT(32|64)rm")>;
-
-def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 6;
@@ -991,24 +983,17 @@ def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+def: InstRW<[HWWriteResGroup13_1], (instrs VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup14], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup15], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[HWWriteResGroup14], (instrs FARJMP64)>;
+def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
let Latency = 6;
@@ -1016,9 +1001,6 @@ def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
@@ -1026,16 +1008,16 @@ def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup17], (instregex "VINSERTF128rm",
- "VINSERTI128rm",
- "VPBLENDDrmi")>;
+def: InstRW<[HWWriteResGroup17], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[HWWriteResGroup17_2], (instrs VPBLENDDYrmi)>;
def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 6;
@@ -1078,25 +1060,18 @@ def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr")>;
def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[HWWriteResGroup25], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup25], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 7;
@@ -1113,16 +1088,6 @@ def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
}
def: InstRW<[HWWriteResGroup28], (instrs FDECSTP)>;
-def HWWriteResGroup29 : SchedWriteRes<[HWPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[HWWriteResGroup29], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri")>;
-
def HWWriteResGroup30 : SchedWriteRes<[HWPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1153,7 +1118,7 @@ def HWWriteResGroup33 : SchedWriteRes<[HWPort01,HWPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup33], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[HWWriteResGroup33], (instrs MMX_MOVDQ2Qrr)>;
def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 2;
@@ -1168,9 +1133,9 @@ def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 7;
@@ -1214,10 +1179,8 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 8;
@@ -1231,26 +1194,18 @@ def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup48], (instrs FARCALL64)>;
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
- "PDEP(32|64)rr",
- "PEXT(32|64)rr",
+def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -1272,38 +1227,29 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VCVTDQ2PSYrm",
- "VCVTPS2DQYrm",
- "VCVTTPS2DQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm,
+ VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVZXWDYrm")>;
-
-def HWWriteResGroup54 : SchedWriteRes<[HWPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[HWWriteResGroup54], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[HWWriteResGroup53_1], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup57], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 3;
@@ -1317,21 +1263,8 @@ def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
-
-def HWWriteResGroup60 : SchedWriteRes<[HWPort06,HWPort0156]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup60], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
+def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> {
let Latency = 4;
@@ -1353,17 +1286,8 @@ def HWWriteResGroup66 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]>
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
-
-def HWWriteResGroup67 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup67], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def HWWriteResGroup68 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 9;
@@ -1377,8 +1301,8 @@ def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPor
let NumMicroOps = 6;
let ResourceCycles = [1,1,1,2,1];
}
-def: InstRW<[HWWriteResGroup69], (instregex "CMPXCHG(8|16|32|64)rm",
- "ROL(8|16|32|64)mCL",
+def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1397,7 +1321,7 @@ def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup71], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>;
def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
let Latency = 4;
@@ -1411,30 +1335,18 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup73], (instregex "MMX_CVTPI2PDirr",
- "MMX_CVT(T?)PD2PIirr",
- "MMX_CVT(T?)PS2PIirr",
- "(V?)CVTDQ2PDrr",
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDirr,
+ MMX_CVTPD2PIirr,
+ MMX_CVTPS2PIirr,
+ MMX_CVTTPD2PIirr,
+ MMX_CVTTPS2PIirr)>;
+def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>;
-
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@@ -1458,32 +1370,29 @@ def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup77], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>;
def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup78], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm",
- "(V?)CVTDQ2PDrm")>;
+def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm,
+ CVTDQ2PDrm,
+ VCVTDQ2PDrm)>;
def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup78_1], (instregex "MMX_CVTPI2PDirm",
- "(V?)CVTSD2SSrm")>;
-
-def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>;
+def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
+ CVTSD2SSrm,
+ VCVTSD2SSrm)>;
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
@@ -1499,10 +1408,10 @@ def HWWriteResGroup81 : SchedWriteRes<[HWPort0156]> {
}
def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>;
-def HWWriteResGroup82 : SchedWriteRes<[HWPort015,HWPort0156]> {
- let Latency = 4;
+def HWWriteResGroup82 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>;
@@ -1548,8 +1457,8 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
- "VPCMPGTQYrm")>;
+def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[HWWriteResGroup91_3], (instrs VPCMPGTQYrm)>;
def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 5;
@@ -1565,13 +1474,6 @@ def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> {
}
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
-def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 4;
@@ -1579,13 +1481,6 @@ def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
-def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -1600,21 +1495,15 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
-def HWWriteResGroup101 : SchedWriteRes<[HWPort06,HWPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
- "VCVTPD2PSYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[HWWriteResGroup102], (instrs VCVTDQ2PDYrr,
+ VCVTPD2PSYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 13;
@@ -1628,7 +1517,7 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[HWWriteResGroup104], (instrs VCVTDQ2PDYrm)>;
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
@@ -1678,7 +1567,7 @@ def HWWriteResGroup130 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let NumMicroOps = 9;
let ResourceCycles = [1,4,1,3];
}
-def: InstRW<[HWWriteResGroup130], (instregex "RCL8rCL")>;
+def: InstRW<[HWWriteResGroup130], (instrs RCL8rCL)>;
def HWWriteResGroup131 : SchedWriteRes<[HWPort06,HWPort0156]> {
let Latency = 11;
@@ -1706,14 +1595,14 @@ def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let NumMicroOps = 10;
let ResourceCycles = [2,3,1,4];
}
-def: InstRW<[HWWriteResGroup142], (instregex "RCR8rCL")>;
+def: InstRW<[HWWriteResGroup142], (instrs RCR8rCL)>;
def HWWriteResGroup143 : SchedWriteRes<[HWPort23,HWPort0156]> {
let Latency = 19;
let NumMicroOps = 15;
let ResourceCycles = [1,14];
}
-def: InstRW<[HWWriteResGroup143], (instregex "POPF16")>;
+def: InstRW<[HWWriteResGroup143], (instrs POPF16)>;
def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> {
let Latency = 21;
@@ -1722,10 +1611,10 @@ def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort2
}
def: InstRW<[HWWriteResGroup144], (instrs INSB, INSL, INSW)>;
-def HWWriteResGroup145 : SchedWriteRes<[HWPort5]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def HWWriteResGroup145 : SchedWriteRes<[HWPort5, HWPort6]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup145], (instrs VZEROALL)>;
@@ -1879,20 +1768,6 @@ def HWWriteResGroup180 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup180], (instrs FNINIT)>;
-def HWWriteResGroup181 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> {
- let Latency = 98;
- let NumMicroOps = 32;
- let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[HWWriteResGroup181], (instregex "DIV(16|32|64)r")>;
-
-def HWWriteResGroup182 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156]> {
- let Latency = 112;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[HWWriteResGroup182], (instregex "IDIV(16|32|64)r")>;
-
def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort06,HWPort0156]> {
let Latency = 115;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
index 27aaeb193583..1c7f24375f61 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
@@ -19,11 +19,16 @@
// different zero-idioms.
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
-// A predicate used to check if an instruction is a LEA, and if it uses all
-// three source operands: base, index, and offset.
-def IsThreeOperandsLEAPredicate: CheckAll<[
- CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
+// A predicate used to identify VPERM that have bits 3 and 7 of their mask set.
+// On some processors, these VPERM instructions are zero-idioms.
+def ZeroIdiomVPERMPredicate : CheckAll<[
+ ZeroIdiomPredicate,
+ CheckImmOperand<3, 0x88>
+]>;
+// A predicate used to check if a LEA instruction uses all three source
+// operands: base, index, and offset.
+def IsThreeOperandsLEAPredicate: CheckAll<[
// isRegOperand(Base)
CheckIsRegOperand<1>,
CheckNot<CheckInvalidRegOperand<1>>,
@@ -42,8 +47,17 @@ def IsThreeOperandsLEAPredicate: CheckAll<[
]>
]>;
+def LEACases : MCOpcodeSwitchCase<
+ [LEA32r, LEA64r, LEA64_32r, LEA16r],
+ MCReturnStatement<IsThreeOperandsLEAPredicate>
+>;
+
+// Used to generate the body of a TII member function.
+def IsThreeOperandsLEABody :
+ MCOpcodeSwitchStatement<[LEACases], MCReturnStatement<FalsePred>>;
+
// This predicate evaluates to true only if the input machine instruction is a
// 3-operands LEA. Tablegen automatically generates a new method for it in
// X86GenInstrInfo.
def IsThreeOperandsLEAFn :
- TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
+ TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 6b7bbdea860a..9dbf0976989f 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -67,10 +67,16 @@ def SBDivider : ProcResource<1>;
// FP division and sqrt on port 0.
def SBFPDivider : ProcResource<1>;
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -109,11 +115,25 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
-defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
-defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
@@ -124,15 +144,17 @@ defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
-defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
+defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
+defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>;
+defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>;
+defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>;
+
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
@@ -144,8 +166,14 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
-def : WriteRes<WriteBitTest,[SBPort05]>;
+
+defm : X86WriteRes<WriteLAHFSAHF, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SBPort05,SBPort23], 6, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestRegLd, [SBPort05,SBPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -159,10 +187,11 @@ defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
// NOTE: These don't exist on Sandy Bridge. Ports are guesses.
defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
+defm : SBWriteResPair<WriteBLS, [SBPort015], 1>;
+defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SBPort5], 1, [1], 1>;
@@ -577,21 +606,21 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
- "MMX_PADDQirr",
- "MMX_PALIGNRrri",
- "MMX_PSIGN(B|D|W)rr")>;
+def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
+ MMX_PABSDrr,
+ MMX_PABSWrr,
+ MMX_PADDQirr,
+ MMX_PALIGNRrri,
+ MMX_PSIGNBrr,
+ MMX_PSIGNDrr,
+ MMX_PSIGNWrr)>;
def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SBWriteResGroup9], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>;
def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
let Latency = 2;
@@ -608,10 +637,7 @@ def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup12], (instregex "(V?)COMISDrr",
- "(V?)COMISSrr",
- "(V?)UCOMISDrr",
- "(V?)UCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
let Latency = 2;
@@ -626,22 +652,15 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
+ MMX_MOVDQ2Qrr)>;
def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup21], (instregex "PUSHFS64")>;
-
-def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>;
+def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
let Latency = 3;
@@ -650,25 +669,13 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
-def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup23_2], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
+def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
-def: InstRW<[SBWriteResGroup25], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1",
+ "RCR(8|16|32|64)r1")>;
def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
@@ -684,33 +691,12 @@ def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
}
def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>;
-
-def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>;
-
-def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>;
-
def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 4;
@@ -724,7 +710,6 @@ def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup30], (instregex "(V?)PCMPGTQrr")>;
def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
let Latency = 5;
@@ -734,6 +719,14 @@ def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)",
"MOVZX(16|32|64)rm(8|16)")>;
+def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [8];
+}
+def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)",
+ "RCR(8|16|32|64)r(i|CL)")>;
+
def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 5;
let NumMicroOps = 2;
@@ -753,8 +746,8 @@ def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m",
- "PUSHGS64")>;
+def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
let Latency = 5;
@@ -779,13 +772,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
-def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let Latency = 3;
let NumMicroOps = 4;
@@ -820,9 +806,9 @@ def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm",
- "POP(16|32|64)r",
- "VBROADCASTSSrm",
+def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm,
+ VBROADCASTSSrm)>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
"(V?)MOV64toPQIrm",
"(V?)MOVDDUPrm",
"(V?)MOVDI2PDIrm",
@@ -837,23 +823,20 @@ def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup49], (instregex "MOV16sm")>;
-
-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort05]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup50], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm",
- "MMX_PALIGNRrmi",
- "MMX_PSIGN(B|D|W)rm")>;
+def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
+ MMX_PABSDrm,
+ MMX_PABSWrm,
+ MMX_PALIGNRrmi,
+ MMX_PSIGNBrm,
+ MMX_PSIGNDrm,
+ MMX_PSIGNWrm)>;
def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 6;
@@ -875,11 +858,11 @@ def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm)>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
@@ -893,14 +876,14 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
+def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQirm)>;
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SBWriteResGroup62], (instregex "VER(R|W)m")>;
+def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
@@ -944,15 +927,9 @@ def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
-def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
@@ -961,12 +938,12 @@ def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
-def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2, 1];
}
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 8;
@@ -990,10 +967,8 @@ def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
let NumMicroOps = 5;
let ResourceCycles = [1,2,2];
}
-def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 8;
@@ -1015,36 +990,7 @@ def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)SD2SI(64)?rm",
- "CVT(T?)SS2SI(64)?rm")>;
-
-def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>;
-
-def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>;
-
-def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>;
-
-def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>;
def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let Latency = 9;
@@ -1092,10 +1038,7 @@ def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort
let NumMicroOps = 6;
let ResourceCycles = [1,1,2,1,1];
}
-def: InstRW<[SBWriteResGroup100], (instregex "BT(16|32|64)mr",
- "BTC(16|32|64)mr",
- "BTR(16|32|64)mr",
- "BTS(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 10;
@@ -1119,6 +1062,14 @@ def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
+def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 11;
+ let ResourceCycles = [7,4];
+}
+def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
+ "RCR(8|16|32|64)m")>;
+
def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 12;
let NumMicroOps = 2;
@@ -1154,6 +1105,71 @@ def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
+def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
+ let Latency = 9;
+ let NumMicroOps = 20;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
+
+def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 4;
+ let ResourceCycles = [];
+}
+def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
+
def: InstRW<[WriteZero], (instrs CLC)>;
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SBWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def SBWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def SBWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+ VXORPDrr)>;
+
+def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SBWriteResGroup30]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
+
} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index bda088e1512f..2c9eb7516085 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -71,10 +71,16 @@ def SKLPortAny : ProcResGroup<[SKLPort0, SKLPort1, SKLPort2, SKLPort3, SKLPort4,
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -107,24 +113,47 @@ def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>;
// Arithmetic.
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op.
-defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
-defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKLWriteResPair<WriteIMul8, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul16, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
-
-defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKLPort06, SKLPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKLPort23,SKLPort06,SKLPort0156,SKLPort237,SKLPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [SKLPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKLDivider used?
+defm : SKLWriteResPair<WriteDiv8, [SKLPort0,SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [SKLPort0,SKLDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
defm : SKLWriteResPair<WriteCMOV, [SKLPort06], 1, [1], 1>; // Conditional move.
@@ -135,8 +164,14 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
-def : WriteRes<WriteBitTest,[SKLPort06]>; //
+
+defm : X86WriteRes<WriteLAHFSAHF, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SKLPort06,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SKLPort0156,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKLPort06,SKLPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKLPort0156,SKLPort23], 5, [1,1], 2>;
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -146,7 +181,10 @@ defm : SKLWriteResPair<WriteTZCNT, [SKLPort1], 3>;
defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
-defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
+defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
+defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3>;
+defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>;
+defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1],
defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
-defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBLS, [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
@@ -612,10 +651,7 @@ def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
let Latency = 1;
@@ -632,47 +668,42 @@ def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[SKLWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP(32|64|80)m")>;
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKLWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKLWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>;
def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
let Latency = 2;
@@ -702,11 +733,10 @@ def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKLWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[SKLWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri",
"SBB8ri")>;
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
@@ -728,10 +758,9 @@ def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
let Latency = 3;
@@ -741,21 +770,13 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
- "VPBROADCASTBrr",
- "VPBROADCASTWrr",
+ "VPBROADCAST(B|W)rr",
"(V?)PCMPGTQ(Y?)rr")>;
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
@@ -765,26 +786,6 @@ def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup32], (instrs FNSTSW16r)>;
-def SKLWriteResGroup33 : SchedWriteRes<[SKLPort06]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup33], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SKLWriteResGroup34 : SchedWriteRes<[SKLPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup34], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
-
def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -805,9 +806,9 @@ def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 3;
@@ -828,10 +829,8 @@ def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
let Latency = 3;
@@ -876,20 +875,6 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> {
def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVT(T?)PS2DQ(Y?)rr")>;
-def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>;
-
def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -912,10 +897,10 @@ def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>;
-def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> {
- let Latency = 4;
+def SKLWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>;
@@ -931,11 +916,8 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67?
def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
@@ -943,8 +925,9 @@ def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup59], (instregex "MMX_CVTPI2PDirr",
- "(V?)CVTDQ2PDrr")>;
+def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDirr,
+ CVTDQ2PDrr,
+ VCVTDQ2PDrr)>;
def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
let Latency = 5;
@@ -969,13 +952,6 @@ def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
}
def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
-def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -983,13 +959,6 @@ def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup63], (instrs XSETBV)>;
-def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -1002,44 +971,44 @@ def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup67], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm)>;
+def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
+ "(V?)MOVSLDUPrm")>;
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup68], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSirr)>;
def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSBirm",
- "MMX_PADDSWirm",
- "MMX_PADDUSBirm",
- "MMX_PADDUSWirm",
- "MMX_PAVGBirm",
- "MMX_PAVGWirm",
- "MMX_PCMPEQBirm",
- "MMX_PCMPEQDirm",
- "MMX_PCMPEQWirm",
- "MMX_PCMPGTBirm",
- "MMX_PCMPGTDirm",
- "MMX_PCMPGTWirm",
- "MMX_PMAXSWirm",
- "MMX_PMAXUBirm",
- "MMX_PMINSWirm",
- "MMX_PMINUBirm",
- "MMX_PSUBSBirm",
- "MMX_PSUBSWirm",
- "MMX_PSUBUSBirm",
- "MMX_PSUBUSWirm")>;
+def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> {
let Latency = 6;
@@ -1054,15 +1023,8 @@ def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64)>;
+def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
let Latency = 6;
@@ -1070,9 +1032,6 @@ def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
@@ -1102,15 +1061,9 @@ def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKLWriteResGroup82], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup82], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 6;
@@ -1132,23 +1085,23 @@ def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKLWriteResGroup86], (instrs VCVTDQ2PDYrr)>;
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 6;
@@ -1167,19 +1120,21 @@ def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
- "VCVTPS2PDYrr",
- "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2PSYrr,
+ VCVTPS2PDYrr,
+ VCVTPD2DQYrr,
+ VCVTTPD2DQYrr)>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
- "(V?)INSERTI128rm",
- "(V?)PADD(B|D|Q|W)rm",
- "(V?)PBLENDDrmi",
+def: InstRW<[SKLWriteResGroup91], (instrs VINSERTF128rm,
+ VINSERTI128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[SKLWriteResGroup91, ReadAfterVecXLd],
+ (instregex "(V?)PADD(B|D|Q|W)rm",
"(V?)PSUB(B|D|Q|W)rm")>;
def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
@@ -1187,9 +1142,9 @@ def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
let Latency = 7;
@@ -1225,10 +1180,8 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 7;
@@ -1242,8 +1195,8 @@ def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64)>;
def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 7;
@@ -1260,39 +1213,26 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> {
def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m",
- "VPBROADCASTBYrm",
- "VPBROADCASTWYrm",
- "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m")>;
+def: InstRW<[SKLWriteResGroup108], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VPADD(B|D|Q|W)Yrm",
- "VPBLENDDYrmi",
+def: InstRW<[SKLWriteResGroup110], (instrs VPBLENDDYrmi)>;
+def: InstRW<[SKLWriteResGroup110, ReadAfterVecYLd],
+ (instregex "VPADD(B|D|Q|W)Yrm",
"VPSUB(B|D|Q|W)Yrm")>;
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
@@ -1302,22 +1242,13 @@ def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup115], (instregex "ROR(8|16|32|64)mCL")>;
-
def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 8;
@@ -1325,6 +1256,7 @@ def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let ResourceCycles = [1,1,1,3];
}
def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1335,25 +1267,25 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
let ResourceCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
-def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(8|16|32|64)rm")>;
def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup120], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSirm)>;
def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm",
- "VPMOVSXBWYrm",
- "VPMOVSXDQYrm",
- "VPMOVSXWDYrm",
- "VPMOVZXWDYrm")>;
+def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
+ VPCMPGTQrm,
+ VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 9;
@@ -1363,13 +1295,6 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1392,8 +1317,8 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
- "ILD_F(16|32|64)m",
- "VPCMPGTQYrm")>;
+ "ILD_F(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10;
@@ -1410,7 +1335,7 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup138], (instregex "MMX_CVTPI2PDirm")>;
+def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDirm)>;
def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 10;
@@ -1424,15 +1349,8 @@ def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
-def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
- "VPHSUBSWYrm")>;
-
-def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 10;
@@ -1460,9 +1378,10 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
- "VCVTPS2PDYrm",
- "VCVT(T?)PS2DQYrm")>;
+def: InstRW<[SKLWriteResGroup147], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm,
+ VCVTPS2DQYrm,
+ VCVTTPS2DQYrm)>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11;
@@ -1493,9 +1412,11 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
let Latency = 11;
@@ -1510,7 +1431,7 @@ def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
let NumMicroOps = 9;
let ResourceCycles = [1,5,1,2];
}
-def: InstRW<[SKLWriteResGroup155], (instregex "RCL8rCL")>;
+def: InstRW<[SKLWriteResGroup155], (instrs RCL8rCL)>;
def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 11;
@@ -1538,7 +1459,7 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>;
def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 14;
@@ -1567,7 +1488,7 @@ def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
let NumMicroOps = 10;
let ResourceCycles = [2,4,1,3];
}
-def: InstRW<[SKLWriteResGroup170], (instregex "RCR8rCL")>;
+def: InstRW<[SKLWriteResGroup170], (instrs RCR8rCL)>;
def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> {
let Latency = 15;
@@ -1723,13 +1644,6 @@ def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F(32|64)m")>;
-def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> {
- let Latency = 28;
- let NumMicroOps = 8;
- let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKLWriteResGroup207], (instregex "IDIV(8|16|32|64)m")>;
-
def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 30;
let NumMicroOps = 3;
@@ -1824,20 +1738,6 @@ def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup220], (instrs FNINIT)>;
-def SKLWriteResGroup221 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
- let Latency = 76;
- let NumMicroOps = 32;
- let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKLWriteResGroup221], (instregex "DIV(16|32|64)r")>;
-
-def SKLWriteResGroup222 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
- let Latency = 102;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKLWriteResGroup222], (instregex "IDIV(16|32|64)r")>;
-
def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 9d5f8555c505..ec8e4db02d8a 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -71,10 +71,16 @@ def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4,
let BufferSize=60;
}
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -107,24 +113,48 @@ def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op.
-defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
-defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
-
-defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKXDivider used?
+defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move.
@@ -135,11 +165,19 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
-def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
-def : WriteRes<WriteBitTest,[SKXPort06]>; //
+defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
// Integer shifts and rotates.
-defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
+defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
+defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>;
+defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>;
+defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>;
defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
-defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
@@ -625,10 +664,7 @@ def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr",
- "BLSI(32|64)rr",
- "BLSMSK(32|64)rr",
- "BLSR(32|64)rr")>;
+def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
let Latency = 1;
@@ -655,48 +691,43 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE,
- CMC, STC)>;
-def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m",
- "SIDT64m",
- "SMSW16m",
- "STRm",
- "SYSCALL")>;
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
- "KMOV(B|D|Q|W)mk",
- "ST_FP(32|64|80)m",
- "VMPTRSTm")>;
+def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
+ "ST_FP(32|64|80)m")>;
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKXWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
- "ROL(8|16|32|64)ri",
- "ROR(8|16|32|64)r1",
- "ROR(8|16|32|64)ri",
- "SET(A|BE)r")>;
+def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>;
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
@@ -726,11 +757,10 @@ def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKXWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8",
- "ADC8ri",
- "SBB8i8",
+def: InstRW<[SKXWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8)>;
+def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri",
"SBB8ri")>;
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
@@ -752,10 +782,9 @@ def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
- "PUSH64i8")>;
+def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
let Latency = 2;
@@ -781,39 +810,26 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
-
def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
+def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
- "KUNPCKBWrr",
- "KUNPCKDQrr",
- "KUNPCKWDrr",
+ "KUNPCK(BW|DQ|WD)rr",
"VALIGND(Z|Z128|Z256)rri",
"VALIGNQ(Z|Z128|Z256)rri",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
- "VCMPSDZrr",
- "VCMPSSZrr",
+ "VCMP(SD|SS)Zrr",
"VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
- "VFPCLASSPD(Z|Z128|Z256)rr",
- "VFPCLASSPS(Z|Z128|Z256)rr",
- "VFPCLASSSDZrr",
- "VFPCLASSSSZrr",
- "VPBROADCASTBrr",
- "VPBROADCASTWrr",
+ "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
+ "VFPCLASS(SD|SS)Zrr",
+ "VPBROADCAST(B|W)rr",
"VPCMPB(Z|Z128|Z256)rri",
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
@@ -823,7 +839,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
- "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -833,26 +848,6 @@ def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>;
-def SKXWriteResGroup35 : SchedWriteRes<[SKXPort06]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup35], (instregex "ROL(8|16|32|64)rCL",
- "ROR(8|16|32|64)rCL",
- "SAR(8|16|32|64)rCL",
- "SHL(8|16|32|64)rCL",
- "SHR(8|16|32|64)rCL")>;
-
-def SKXWriteResGroup36 : SchedWriteRes<[SKXPort0156]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup36], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
- XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
- XCHG16ar, XCHG32ar, XCHG64ar)>;
-
def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -872,9 +867,9 @@ def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSDWirr",
- "MMX_PACKSSWBirr",
- "MMX_PACKUSWBirr")>;
+def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 3;
@@ -895,10 +890,8 @@ def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r1",
- "RCL(8|16|32|64)ri",
- "RCR(8|16|32|64)r1",
- "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
let Latency = 3;
@@ -1000,20 +993,6 @@ def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VPMOVUSWB(Z|Z128|Z256)rr",
"VPMOVWB(Z|Z128|Z256)rr")>;
-def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>;
-
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -1030,10 +1009,10 @@ def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
-def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> {
- let Latency = 4;
+def SKXWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
let NumMicroOps = 4;
- let ResourceCycles = [1,3];
+ let ResourceCycles = [];
}
def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
@@ -1049,11 +1028,8 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
- "MOVSX(16|32|64)rm32",
- "MOVSX(16|32|64)rm8",
- "MOVZX(16|32|64)rm16",
- "MOVZX(16|32|64)rm8",
+def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71?
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
@@ -1104,13 +1080,6 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
-def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@@ -1150,13 +1119,6 @@ def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>;
-def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
- let Latency = 5;
- let NumMicroOps = 5;
- let ResourceCycles = [2,3];
-}
-def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
@@ -1169,19 +1131,21 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm",
- "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm",
- "VPBROADCASTDrm",
- "VPBROADCASTQrm")>;
+def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm,
+ VMOVSHDUPrm,
+ VMOVSLDUPrm,
+ MOVSHDUPrm,
+ MOVSLDUPrm)>;
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup72], (instregex "MMX_CVTPI2PSirr",
- "VCOMPRESSPD(Z|Z128|Z256)rr",
+def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
"VCOMPRESSPS(Z|Z128|Z256)rr",
"VPCOMPRESSD(Z|Z128|Z256)rr",
"VPCOMPRESSQ(Z|Z128|Z256)rr",
@@ -1192,41 +1156,34 @@ def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSBirm",
- "MMX_PADDSWirm",
- "MMX_PADDUSBirm",
- "MMX_PADDUSWirm",
- "MMX_PAVGBirm",
- "MMX_PAVGWirm",
- "MMX_PCMPEQBirm",
- "MMX_PCMPEQDirm",
- "MMX_PCMPEQWirm",
- "MMX_PCMPGTBirm",
- "MMX_PCMPGTDirm",
- "MMX_PCMPGTWirm",
- "MMX_PMAXSWirm",
- "MMX_PMAXUBirm",
- "MMX_PMINSWirm",
- "MMX_PMINUBirm",
- "MMX_PSUBSBirm",
- "MMX_PSUBSWirm",
- "MMX_PSUBUSBirm",
- "MMX_PSUBUSWirm")>;
+def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64",
- "JMP(16|32|64)m")>;
-
-def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64)>;
+def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let Latency = 6;
@@ -1234,9 +1191,6 @@ def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
- "BLSI(32|64)rm",
- "BLSMSK(32|64)rm",
- "BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
@@ -1244,8 +1198,8 @@ def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)",
- "VMOVDI2PDIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
let Latency = 6;
@@ -1276,15 +1230,9 @@ def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
-def: InstRW<[SKXWriteResGroup86], (instregex "BTC(16|32|64)mi8",
- "BTR(16|32|64)mi8",
- "BTS(16|32|64)mi8",
- "SAR(8|16|32|64)m1",
- "SAR(8|16|32|64)mi",
- "SHL(8|16|32|64)m1",
- "SHL(8|16|32|64)mi",
- "SHR(8|16|32|64)m1",
- "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 6;
@@ -1306,23 +1254,23 @@ def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m",
- "VBROADCASTF128",
- "VBROADCASTI128",
- "VBROADCASTSDYrm",
- "VBROADCASTSSYrm",
- "VMOVDDUPYrm",
- "VMOVSHDUPYrm",
- "VMOVSLDUPYrm",
- "VPBROADCASTDYrm",
- "VPBROADCASTQYrm")>;
+def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
@@ -1389,12 +1337,14 @@ def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
+def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
+ (instregex "VBLENDMPDZ128rm(b?)",
"VBLENDMPSZ128rm(b?)",
"VBROADCASTI32X2Z128m(b?)",
"VBROADCASTSSZ128m(b?)",
- "VINSERTF128rm",
- "VINSERTI128rm",
+ "VINSERT(F|I)128rm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
"VMOVDDUPZ128rm(b?)",
@@ -1404,14 +1354,12 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
"VMOVDQU32Z128rm(b?)",
"VMOVDQU64Z128rm(b?)",
"VMOVDQU8Z128rm(b?)",
- "VMOVNTDQAZ128rm(b?)",
"VMOVSHDUPZ128rm(b?)",
"VMOVSLDUPZ128rm(b?)",
"VMOVUPDZ128rm(b?)",
"VMOVUPSZ128rm(b?)",
"VPADD(B|D|Q|W)Z128rm(b?)",
"(V?)PADD(B|D|Q|W)rm",
- "VPBLENDDrmi",
"VPBLENDM(B|D|Q|W)Z128rm(b?)",
"VPBROADCASTDZ128m(b?)",
"VPBROADCASTQZ128m(b?)",
@@ -1425,9 +1373,9 @@ def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSDWirm",
- "MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm")>;
+def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@@ -1495,10 +1443,8 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m1",
- "ROL(8|16|32|64)mi",
- "ROR(8|16|32|64)m1",
- "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1512,8 +1458,8 @@ def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
}
-def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m",
- "FARCALL64")>;
+def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1567,20 +1513,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> {
def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>;
-
-def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1588,20 +1520,23 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
"VFPCLASSSDZrm(b?)",
- "VPBROADCASTBYrm",
"VPBROADCASTB(Z|Z256)m(b?)",
- "VPBROADCASTWYrm",
- "VPBROADCASTW(Z|Z256)m(b?)",
- "VPMOVSXBDYrm",
- "VPMOVSXBQYrm",
- "VPMOVSXWQYrm")>;
+ "VPBROADCASTW(Z|Z256)m(b?)")>;
+def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
+ VPBLENDDYrmi)>;
+def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
+ (instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VBLENDMPS(Z|Z256)rm(b?)",
"VBROADCASTF32X2Z256m(b?)",
"VBROADCASTF32X2Zm(b?)",
@@ -1638,14 +1573,12 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VMOVDQU32(Z|Z256)rm(b?)",
"VMOVDQU64(Z|Z256)rm(b?)",
"VMOVDQU8(Z|Z256)rm(b?)",
- "VMOVNTDQAZ256rm(b?)",
"VMOVSHDUP(Z|Z256)rm(b?)",
"VMOVSLDUP(Z|Z256)rm(b?)",
"VMOVUPD(Z|Z256)rm(b?)",
"VMOVUPS(Z|Z256)rm(b?)",
"VPADD(B|D|Q|W)Yrm",
"VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
- "VPBLENDDYrmi",
"VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBROADCASTD(Z|Z256)m(b?)",
"VPBROADCASTQ(Z|Z256)m(b?)",
@@ -1661,22 +1594,13 @@ def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
- let Latency = 8;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup126], (instregex "ROR(8|16|32|64)mCL")>;
-
def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m1",
- "RCL(8|16|32|64)mi",
- "RCR(8|16|32|64)m1",
- "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
@@ -1684,6 +1608,7 @@ def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let ResourceCycles = [1,1,1,3];
}
def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
"SAR(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
@@ -1694,7 +1619,6 @@ def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
let ResourceCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>;
-def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(8|16|32|64)rm")>;
def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
let Latency = 8;
@@ -1734,19 +1658,20 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
- "VALIGNQZ128rm(b?)i",
- "VCMPPDZ128rm(b?)i",
- "VCMPPSZ128rm(b?)i",
- "VCMPSDZrm",
- "VCMPSSZrm",
+def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
+def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
+ "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
"VFPCLASSSSZrm(b?)",
"VPCMPBZ128rmi(b?)",
"VPCMPDZ128rmi(b?)",
@@ -1770,18 +1695,14 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VPMINUQZ128rm(b?)",
"VPMOVSXBDZ128rm(b?)",
"VPMOVSXBQZ128rm(b?)",
- "VPMOVSXBWYrm",
"VPMOVSXBWZ128rm(b?)",
- "VPMOVSXDQYrm",
"VPMOVSXDQZ128rm(b?)",
- "VPMOVSXWDYrm",
"VPMOVSXWDZ128rm(b?)",
"VPMOVSXWQZ128rm(b?)",
"VPMOVZXBDZ128rm(b?)",
"VPMOVZXBQZ128rm(b?)",
"VPMOVZXBWZ128rm(b?)",
"VPMOVZXDQZ128rm(b?)",
- "VPMOVZXWDYrm",
"VPMOVZXWDZ128rm(b?)",
"VPMOVZXWQZ128rm(b?)",
"VPTESTMBZ128rm(b?)",
@@ -1801,13 +1722,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1829,6 +1743,7 @@ def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
+def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>;
def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VALIGND(Z|Z256)rm(b?)i",
@@ -1843,7 +1758,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPCMPEQW(Z|Z256)rm(b?)",
"VPCMPGTB(Z|Z256)rm(b?)",
"VPCMPGTD(Z|Z256)rm(b?)",
- "VPCMPGTQYrm",
"VPCMPGTQ(Z|Z256)rm(b?)",
"VPCMPGTW(Z|Z256)rm(b?)",
"VPCMPQ(Z|Z256)rmi(b?)",
@@ -1914,15 +1828,8 @@ def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
-def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
- "VPHSUBSWYrm")>;
-
-def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
@@ -1950,11 +1857,10 @@ def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
- "VCVTDQ2PSYrm",
- "VCVTDQ2PS(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm)>;
+def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTPH2PS(Z|Z256)rm(b?)",
- "VCVTPS2PDYrm",
"VCVTPS2PD(Z|Z256)rm(b?)",
"VCVTQQ2PD(Z|Z256)rm(b?)",
"VCVTQQ2PSZ256rm(b?)",
@@ -1965,8 +1871,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
"VCVT(T?)PS2QQZ256rm(b?)",
"VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
"VCVT(T?)PS2UQQZ256rm(b?)",
- "VCVTUDQ2PD(Z|Z256)rm(b?)",
- "VCVTUDQ2PS(Z|Z256)rm(b?)",
+ "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
"VCVTUQQ2PD(Z|Z256)rm(b?)",
"VCVTUQQ2PSZ256rm(b?)")>;
@@ -2000,9 +1905,11 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2PSrm",
- "CVT(T?)PD2DQrm",
- "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 11;
@@ -2024,7 +1931,7 @@ def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
let NumMicroOps = 9;
let ResourceCycles = [1,5,1,2];
}
-def: InstRW<[SKXWriteResGroup170], (instregex "RCL8rCL")>;
+def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>;
def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
let Latency = 11;
@@ -2091,7 +1998,7 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 13;
@@ -2151,7 +2058,7 @@ def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
let NumMicroOps = 10;
let ResourceCycles = [2,4,1,3];
}
-def: InstRW<[SKXWriteResGroup190], (instregex "RCR8rCL")>;
+def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>;
def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> {
let Latency = 15;
@@ -2181,10 +2088,10 @@ def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
}
def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>;
-def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> {
- let Latency = 16;
- let NumMicroOps = 16;
- let ResourceCycles = [16];
+def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
+ let Latency = 12;
+ let NumMicroOps = 34;
+ let ResourceCycles = [1, 4, 5];
}
def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
@@ -2408,13 +2315,6 @@ def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01
def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
VPGATHERDDZ256rm)>;
-def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> {
- let Latency = 28;
- let NumMicroOps = 8;
- let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKXWriteResGroup241], (instregex "IDIV(8|16|32|64)m")>;
-
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
@@ -2547,20 +2447,6 @@ def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>;
-def SKXWriteResGroup264 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
- let Latency = 76;
- let NumMicroOps = 32;
- let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKXWriteResGroup264], (instregex "DIV(16|32|64)r")>;
-
-def SKXWriteResGroup265 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> {
- let Latency = 102;
- let NumMicroOps = 66;
- let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKXWriteResGroup265], (instregex "IDIV(16|32|64)r")>;
-
def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td
index ef9ce94706df..25aa83f96d3a 100644
--- a/contrib/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm/lib/Target/X86/X86Schedule.td
@@ -14,6 +14,9 @@
// but other register operands don't have to be read until the load is ready.
// These operands are marked with ReadAfterLd.
def ReadAfterLd : SchedRead;
+def ReadAfterVecLd : SchedRead;
+def ReadAfterVecXLd : SchedRead;
+def ReadAfterVecYLd : SchedRead;
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
@@ -37,15 +40,19 @@ multiclass X86WriteRes<SchedWrite SchedRW,
class X86FoldableSchedWrite : SchedWrite {
// The SchedWrite to use when a load is folded into the instruction.
SchedWrite Folded;
+ // The SchedRead to tag register operands than don't need to be ready
+ // until the folded load has completed.
+ SchedRead ReadAfterFold;
}
// Multiclass that produces a linked pair of SchedWrites.
-multiclass X86SchedWritePair {
+multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
// Register-Memory operation.
def Ld : SchedWrite;
// Register-Register operation.
def NAME : X86FoldableSchedWrite {
let Folded = !cast<SchedWrite>(NAME#"Ld");
+ let ReadAfterFold = ReadAfter;
}
}
@@ -107,19 +114,33 @@ def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
+def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
-def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
-def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
-defm WriteIMul : X86SchedWritePair; // Integer multiplication.
-defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
+def WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+// Integer multiplication
+defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication.
+defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication.
+defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
+defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
+defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication.
+defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
+defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
+defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
+defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
+def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap.
+def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
@@ -142,18 +163,32 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
-def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
+
+def WriteBitTest : SchedWrite; // Bit Test
+def WriteBitTestImmLd : SchedWrite;
+def WriteBitTestRegLd : SchedWrite;
+
+def WriteBitTestSet : SchedWrite; // Bit Test + Set
+def WriteBitTestSetImmLd : SchedWrite;
+def WriteBitTestSetRegLd : SchedWrite;
+def WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
+def WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
// Integer shifts and rotates.
-defm WriteShift : X86SchedWritePair;
+defm WriteShift : X86SchedWritePair;
+defm WriteShiftCL : X86SchedWritePair;
+defm WriteRotate : X86SchedWritePair;
+defm WriteRotateCL : X86SchedWritePair;
+
// Double shift instructions.
def WriteSHDrri : SchedWrite;
def WriteSHDrrcl : SchedWrite;
def WriteSHDmri : SchedWrite;
def WriteSHDmrcl : SchedWrite;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
+defm WriteBLS : X86SchedWritePair;
defm WriteBZHI : X86SchedWritePair;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
@@ -185,98 +220,98 @@ def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
-defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
-defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
-defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
-defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
-defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
-defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
-defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
-defm WriteFCmp : X86SchedWritePair; // Floating point compare.
-defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
-defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
-defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
-defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
-defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
-defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
-defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
-defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
-defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
-defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
-defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
-defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
-defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
-defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
-defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
-defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
-defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
-defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
-defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
-defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
-defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
-defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
-defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
-defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
-defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
-defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
-defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
-defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
-defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
-defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
-defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
-defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
-defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
-defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
-defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
-defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
-defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
-defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
-defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
-defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
-defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
-defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
-defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
-defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
-defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
-defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
-defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
-defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
-defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
-defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
-defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
-defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
-defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
-defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
-defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
-defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
-defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
-defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
-defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
-defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
-defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
-defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
-defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
+defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
+defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
+defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
+defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
+defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare.
+defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
+defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
+defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
+defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
+defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
+defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
+defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
+defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
+defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division.
+defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
+defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
+defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
+defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
+defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM).
+defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
+defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root.
+defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate.
+defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add.
+defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
+defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
+defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
+defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
+defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
+defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
+defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
+defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs.
+defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
+defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
+defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
+defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
+defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
+defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
+defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
-defm WriteFHAdd : X86SchedWritePair;
-defm WriteFHAddY : X86SchedWritePair;
-defm WriteFHAddZ : X86SchedWritePair;
-defm WritePHAdd : X86SchedWritePair;
-defm WritePHAddX : X86SchedWritePair;
-defm WritePHAddY : X86SchedWritePair;
-defm WritePHAddZ : X86SchedWritePair;
+defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>;
+defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>;
+defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
+defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
@@ -299,54 +334,54 @@ def WriteVecMoveY : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
-defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
-defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
-defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
-defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
-defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
-defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
-defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
-defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
-defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
-defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
-defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
-defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
-defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
-defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
-defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
-defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
-defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
-defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
-defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
-defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
-defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
-defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
-defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
-defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
-defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
-defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
-defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
-defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
-defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
-defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
-defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
-defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
-defm WriteBlend : X86SchedWritePair; // Vector blends.
-defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
-defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
-defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
-defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
-defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
-defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
-defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
-defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
-defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
-defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
-defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
-defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
-defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals.
+defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM).
+defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
+defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
+defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
+defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
+defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
+defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
+defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
+defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
+defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
+defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
+defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
@@ -360,39 +395,39 @@ def WriteVecMOVMSKY : SchedWrite;
def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
-defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
-defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
-defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
-defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
-
-defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
-defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
-defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
-defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
-
-defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
-defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
-defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
-defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
-
-defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
-defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
-defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
-defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
-
-defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
-defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
-defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
-defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
-
-defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
-defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
-defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
-defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
-
-defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
-defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
-defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer.
+defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer.
+defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double.
+defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float.
+defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion.
+defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion.
+defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
@@ -402,25 +437,25 @@ def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
-defm WriteCRC32 : X86SchedWritePair;
+defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm WritePCmpIStrM : X86SchedWritePair;
+defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Mask
-defm WritePCmpEStrM : X86SchedWritePair;
+defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Implicit Length Strings, Return Index
-defm WritePCmpIStrI : X86SchedWritePair;
+defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
// Packed Compare Explicit Length Strings, Return Index
-defm WritePCmpEStrI : X86SchedWritePair;
+defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
// AES instructions.
-defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
-defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
-defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
// Carry-less multiplication instructions.
-defm WriteCLMul : X86SchedWritePair;
+defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
@@ -433,13 +468,13 @@ def WriteSTMXCSR : SchedWrite;
def WriteSystem : SchedWrite;
// AVX2.
-defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
-defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
-defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
-defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
-defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
-defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
-defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
index a7f461c456bd..1589ff2ef402 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -43,6 +43,9 @@ def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
@@ -78,11 +81,24 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+
+defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
+defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [AtomPort01, AtomPort0], 1, [1, 1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,32 +124,16 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteBitTest,[AtomPort01]>;
-
-defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteRes<WriteBitTest, [AtomPort1], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [AtomPort01], 9, [9], 1>;
+defm : X86WriteRes<WriteBitTestSet, [AtomPort1], 1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [AtomPort1], 1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [AtomPort1], 1, [1], 1>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [AtomPort1]>;
-def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
- let Latency = 8;
- let ResourceCycles = [8];
-}
-def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
-
-def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
- let Latency = 6;
- let ResourceCycles = [6];
-}
-def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
-
-def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
- let Latency = 14;
- let ResourceCycles = [14];
-}
-def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
- IMUL64rmi8, IMUL64rmi32)>;
-
// Bit counts.
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
@@ -141,15 +141,19 @@ defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShiftCL, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotate, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotateCL, [AtomPort0], [AtomPort0]>;
defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
@@ -498,20 +502,13 @@ def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
-def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
- let Latency = 5;
- let ResourceCycles = [5];
-}
-def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
-
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
-def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
- "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r")>;
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
@@ -563,16 +560,14 @@ def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
SCASB, SCASL, SCASQ, SCASW)>;
-def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
- "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
- "XADD(8|16|32|64)rr",
- "XCHG(8|16|32|64)(ar|rr)",
+def : InstRW<[AtomWrite01_2], (instregex "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
"MMX_P(ADD|SUB)Qirr",
"MOV(S|Z)X16rr8",
"MOV(UPS|UPD|DQU)mr",
"MASKMOVDQU(64)?",
"P(ADD|SUB)Qrr")>;
+def : SchedAlias<WriteBitTestSetImmRMW, AtomWrite01_2>;
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
let Latency = 3;
@@ -616,8 +611,7 @@ def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8)>;
-def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
- "IST_F(P)?(16|32|64)?m",
+def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m",
"MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
@@ -639,8 +633,7 @@ def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
let Latency = 9;
let ResourceCycles = [9];
}
-def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
- POPA16, POPA32,
+def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
PUSHF16, PUSHF32, PUSHF64,
SHLD64mrCL, SHRD64mrCL,
SHLD64mri8, SHRD64mri8,
@@ -663,7 +656,7 @@ def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let ResourceCycles = [11];
}
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
-def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, AtomWrite01_11>;
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
let Latency = 13;
@@ -677,12 +670,6 @@ def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
}
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
-def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
- let Latency = 15;
- let ResourceCycles = [15];
-}
-def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
-
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
let Latency = 17;
let ResourceCycles = [17];
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
new file mode 100644
index 000000000000..5798e1b2671b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -0,0 +1,1282 @@
+//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD bdver2 (Piledriver) to support
+// instruction scheduling and other instruction cost heuristics.
+// Based on:
+// * AMD Software Optimization Guide for AMD Family 15h Processors.
+// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
+// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
+// http://www.agner.org/optimize/microarchitecture.pdf
+// * https://www.realworldtech.com/bulldozer/
+// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
+//
+//===----------------------------------------------------------------------===//
+
+def BdVer2Model : SchedMachineModel {
+ let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
+ let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
+ let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
+ let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
+ let HighLatency = 25; // FIXME: any better choice?
+ let MispredictPenalty = 20; // Minimum branch misdirection penalty.
+
+ let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+
+ // FIXME: Incomplete. This flag is set to allow the scheduler to assign
+ // a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+} // SchedMachineModel
+
+let SchedModel = BdVer2Model in {
+
+
+//===----------------------------------------------------------------------===//
+// Pipes
+//===----------------------------------------------------------------------===//
+
+// There are total of eight pipes.
+
+//===----------------------------------------------------------------------===//
+// Integer execution pipes
+//
+
+// Two EX (ALU) pipes.
+def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0
+def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1
+def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
+
+// Two AGLU pipes, identical.
+def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
+
+//===----------------------------------------------------------------------===//
+// Floating point execution pipes
+//
+
+// Four FPU pipes.
+
+def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
+def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
+def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
+def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
+
+// FPU grouping
+def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
+def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
+
+
+//===----------------------------------------------------------------------===//
+// RCU
+//===----------------------------------------------------------------------===//
+
+// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
+// On the other hand, the RCU reorder buffer size for Piledriver does not
+// seem be specified in any trustworthy source.
+// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
+// RCU reorder buffer size of 128. So that is a good guess for now.
+def PdRCU : RetireControlUnit<128, 4>;
+
+
+//===----------------------------------------------------------------------===//
+// Pipelines
+//===----------------------------------------------------------------------===//
+
+// There are total of two pipelines, each one with it's own scheduler.
+
+//===----------------------------------------------------------------------===//
+// Integer Pipeline Scheduling
+//
+
+// There is one Integer Scheduler per core.
+
+// Integer physical register file has 96 registers of 64-bit.
+def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
+
+// Unified Integer, Memory Scheduler has 40 entries.
+def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
+ // Up to 4 IPC can be decoded, issued, retired.
+ let BufferSize = 40;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FPU Pipeline Scheduling
+//
+
+// The FPU unit is shared between the two cores.
+
+// FP physical register file has 160 registers of 128-bit.
+// Operations on 256-bit data types are cracked into two COPs.
+def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// Unified FP Scheduler has 64 entries,
+def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
+ // Up to 4 IPC can be decoded, issued, retired.
+ let BufferSize = 64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functional units
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Load-Store Units
+//
+
+let Super = PdAGLU01 in
+def PdLoad : ProcResource<2> {
+ // For Piledriver, the load queue is 40 entries deep.
+ let BufferSize = 40;
+}
+
+def PdLoadQueue : LoadQueue<PdLoad>;
+
+let Super = PdAGLU01 in
+def PdStore : ProcResource<1> {
+ // For Piledriver, the store queue is 24 entries deep.
+ let BufferSize = 24;
+}
+
+def PdStoreQueue : StoreQueue<PdStore>;
+
+//===----------------------------------------------------------------------===//
+// Integer Execution Units
+//
+
+def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division
+def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
+
+def PdMul : ProcResource<1>; // PdEX1; integer multiplication
+def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Units
+//
+
+// Two FMAC/FPFMA units.
+def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1
+
+// One 128-bit integer multiply-accumulate unit.
+def PdFPMMA : ProcResource<1>; // PdFPU0
+
+// One fp conversion unit.
+def PdFPCVT : ProcResource<1>; // PdFPU0
+
+// One unit for shuffles, packs, permutes, shifts.
+def PdFPXBR : ProcResource<1>; // PdFPU1
+
+// Two 128-bit packed integer units.
+def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3
+
+// One FP store unit.
+def PdFPSTO : ProcResource<1>; // PdFPU3
+
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass PdWriteRes<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+}
+
+multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat,
+ list<int> Res, int UOps,
+ int LoadLat, int LoadRes, int LoadUOps> {
+ defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+
+ defm : PdWriteRes<SchedRW.Folded,
+ !listconcat([PdLoad], ExePorts),
+ !add(Lat, LoadLat),
+ !if(!and(!empty(Res), !eq(LoadRes, 1)),
+ [],
+ !listconcat([LoadRes], Res)),
+ !add(UOps, LoadUOps)>;
+}
+
+multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/4, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/5, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat,
+ list<int> Res, int UOps = 2,
+ int LoadUOps = 0> {
+ defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ /*LoadLat*/5, /*LoadRes*/2, LoadUOps>;
+}
+
+//===----------------------------------------------------------------------===//
+// Here be dragons.
+//===----------------------------------------------------------------------===//
+
+// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
+// needn't be available until 4 cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
+// A folded store needs a cycle on the PdStore for the store data.
+def : WriteRes<WriteRMW, [PdStore]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteStore, [PdStore]>;
+def : WriteRes<WriteStoreNT, [PdStore]>;
+def : WriteRes<WriteMove, [PdEX01]>;
+
+// Load/store MXCSR.
+// FIXME: These are copy and pasted from WriteLoad/Store.
+def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; }
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, [/*No ExePorts*/]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteFence, [PdStore]>;
+
+def PdWriteXLAT : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+}
+def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
+
+def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
+ let Latency = 184;
+ let NumMicroOps = 45;
+}
+def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
+ "LSL(16|32|64)rr")>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteALU, [PdEX01]>;
+
+def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1],
+ (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
+ BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
+ BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
+ BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
+ TZMSK32rr, TZMSK64rr)>;
+
+def PdWriteBMI1m : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1m],
+ (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
+ BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
+ BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
+ BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
+ TZMSK32rm, TZMSK64rm)>;
+
+defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
+
+defm : PdWriteRes<WriteBSWAP32, [PdEX1]>;
+defm : PdWriteRes<WriteBSWAP64, [PdEX1]>;
+defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [], 5>;
+defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [], 2>;
+defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
+
+def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
+
+def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
+
+def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
+ (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 18;
+}
+def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
+
+def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
+ let Latency = 3;
+ let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
+
+def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
+
+def PdWriteXADD : SchedWriteRes<[PdEX1]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
+
+def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
+let Latency = 6;
+let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
+
+defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [], 2>;
+defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [], 2>;
+defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [], 1, 1>;
+defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4>;
+defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 4]>;
+defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
+defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
+defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
+defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
+defm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>;
+defm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
+
+defm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
+defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>;
+defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
+defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
+
+defm : PdWriteResExPair<WriteCRC32, [PdEX01], 3, [4], 3>;
+
+def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
+ let Latency = 5;
+ let ResourceCycles = [4];
+ let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
+
+def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
+ let Latency = 6;
+ let ResourceCycles = [4];
+ let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
+
+def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let ResourceCycles = [4];
+ let NumMicroOps = 11;
+}
+def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
+
+defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
+defm : PdWriteResExPair<WriteCMOV2, [PdEX01], 1, [], 1, 1>; // Conditional (CF + ZF flag) move.
+
+def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm,
+ CMOVGE16rm, CMOVGE32rm, CMOVGE64rm,
+ CMOVL16rm, CMOVL32rm, CMOVL64rm,
+ CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>;
+
+defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move.
+
+def : WriteRes<WriteSETCC, [PdEX01]>; // Setcc.
+def : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>;
+
+def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm,
+ SETLEm, SETLm)>;
+
+defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [], 2>;
+
+def WriteLAHF : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteLAHF], (instrs LAHF)>;
+
+def WriteSAHF : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteSAHF], (instrs SAHF)>;
+
+defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [1], 1>;
+defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [1, 1], 1>;
+defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [1, 1], 7>;
+defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [1], 2>;
+defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
+defm : PdWriteRes<WriteBitTestSetImmRMW, [PdEX01, PdLoad], 6, [1, 1], 4>;
+defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
+defm : PdWriteRes<WriteBitTestSetRegRMW, [PdEX01, PdLoad], 6, [1, 1], 10>;
+
+// This is for simple LEAs with one or two input operands.
+// FIXME: SAGU 3-operand LEA
+def : WriteRes<WriteLEA, [PdEX01]> { let NumMicroOps = 2; }
+
+// Bit counts.
+defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [4], 6, 2>;
+defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [4], 7, 2>;
+defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4>;
+defm : PdWriteResExPair<WriteLZCNT, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteTZCNT, [PdEX01], 2, [2], 2>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteShift, [PdEX01]>;
+defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>;
+defm : PdWriteResExPair<WriteRotate, [PdEX01]>;
+defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
+
+def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 12;
+ let NumMicroOps = 26;
+}
+def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
+
+def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 12;
+ let NumMicroOps = 23;
+}
+def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
+
+def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 11;
+ let NumMicroOps = 24;
+}
+def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
+
+def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
+
+def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 10;
+ let NumMicroOps = 19;
+}
+def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
+
+def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+
+def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>;
+
+def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>;
+
+def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 7;
+ let NumMicroOps = 15;
+}
+def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
+
+
+def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 9;
+ let NumMicroOps = 20;
+}
+def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
+
+def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 11;
+ let NumMicroOps = 21;
+}
+def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
+
+def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 8;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
+
+def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
+ let Latency = 13;
+ let NumMicroOps = 25;
+}
+def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
+
+// SHLD/SHRD.
+defm : PdWriteRes<WriteSHDrri, [PdEX01], 4, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 4, [8], 7>;
+
+def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
+
+def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
+ SHLD32rrCL,
+ SHRD32rrCL)>;
+
+defm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>;
+defm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>;
+
+defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [], 2>;
+
+defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteFStore, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU1, PdFPSTO]>;
+defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+
+def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
+
+def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
+
+defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 4], 18>;
+defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 4], 34>;
+
+defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
+
+defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+
+defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+
+defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+
+defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+
+defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+
+def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+ let Latency = 6;
+}
+def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
+
+def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
+def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
+
+defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+
+defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+
+defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+
+
+defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 3], 15, 2>;
+
+defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 3], 16, 2>;
+defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 6], /*or 29*/ 25, 4>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+
+def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
+
+defm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+
+defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 42]>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 54]>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 35]>;
+defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA]>;
+
+defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+
+def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr,
+ VFRCZSDrr, VFRCZSSrr)>;
+
+def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
+ VFRCZSDrm, VFRCZSSrm)>;
+
+def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 10;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
+
+def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 15;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
+
+defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+
+defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+
+def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
+
+defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
+
+def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
+
+def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
+
+def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+ let Latency = 8; // 4 + 4
+ let NumMicroOps = 10;
+}
+def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
+
+// FIXME: f+3 ST, LD+STC latency
+defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+
+defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+
+defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
+ MMX_CVTPI2PDirr)>;
+
+def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+
+defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU1, PdFPSTO], 8, [], 2, 1>;
+defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+
+defm : PdWriteRes<WriteCvtPS2PH, [PdFPU1, PdFPSTO], 8, [], 2>;
+defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+
+defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU1, PdFPSTO, PdStore], 4, [], 3>;
+defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU1, PdFPSTO, PdFPFMA, PdStore], 4, [2, 1, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [], 2>;
+
+defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5>;
+
+defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU1, PdFPSTO]>;
+defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+
+def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
+
+defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
+defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 10>;
+defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 10, [], 2>;
+
+defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+
+defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+
+defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+
+defm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>;
+defm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+
+defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+
+def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+ let Latency = 4;
+ let ResourceCycles = [2, 1, 2, 1];
+}
+def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+ VPMACSSDQLrr)>;
+
+defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+
+defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+
+defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>;
+
+defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+
+defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+
+defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>;
+defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
+
+defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [], 2>;
+defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
+
+defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
+
+def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 3;
+}
+def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 6, [1, 2, 1], 7, 2>;
+
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
+
+defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [], 3, 1>;
+defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [2, 1], 8, 2>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
+
+defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [], 3, 1>;
+defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
+
+def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
+ PHADDWrr, PHSUBWrr,
+ PHADDSWrr, PHSUBSWrr,
+ VPHADDDrr, VPHSUBDrr,
+ VPHADDWrr, VPHSUBWrr,
+ VPHADDSWrr, VPHSUBSWrr)>;
+
+def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
+ PHADDWrm, PHSUBWrm,
+ PHADDSWrm, PHSUBSWrm,
+ VPHADDDrm, VPHSUBDrm,
+ VPHADDWrm, VPHSUBWrm,
+ VPHADDSWrm, VPHSUBSWrm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
+
+def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+ let Latency = 13;
+ let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 4];
+}
+def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 2, 4];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm)>;
+
+def PdWriteVZEROALL : SchedWriteRes<[]> {
+ let Latency = 90;
+ let NumMicroOps = 32;
+}
+def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
+
+def PdWriteVZEROUPPER : SchedWriteRes<[]> {
+ let Latency = 46;
+ let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
+
+///////////////////////////////////////////////////////////////////////////////
+// SchedWriteVariant definitions.
+///////////////////////////////////////////////////////////////////////////////
+
+def PdWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def PdWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
+]>;
+def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def PdWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
+]>;
+def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+ XORPDrr, VXORPDrr,
+ ANDNPSrr, VANDNPSrr,
+ ANDNPDrr, VANDNPDrr)>;
+
+// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
+
+def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+
+def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+ PANDNrr, VPANDNrr)>;
+
+def PdWriteVZeroIdiomALU : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
+ MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PCMPGTBirr,
+ MMX_PCMPGTDirr,
+ MMX_PCMPGTWirr)>;
+
+def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+ SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+// VPCMPGTQ, but not PCMPGTQ!
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+ MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+ MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // xmm int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
+ // But not PCMPEQQrr.
+ ], ZeroIdiomPredicate>,
+
+ // AVX
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
+ // But not VPCMPEQQrr.
+ ], ZeroIdiomPredicate>
+]>;
+
+
+} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 719e71cd25e5..33a6b01546d7 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -48,12 +48,22 @@ def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
// part of it.
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
// access" - Agner Fog's "microarchitecture.pdf".
-def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+def JIntegerPRF : RegisterFile<64, [GR64, CCR], [1, 1], [1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
// registers. Operations on 256-bit data types are cracked into two COPs.
// Reference: www.realworldtech.com/jaguar/4/
-def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The PRF in the floating point unit can eliminate a move from a MMX or SSE
+// register that is know to be zero (i.e. it has been zeroed using a zero-idiom
+// dependency breaking instruction, or via VZEROALL).
+// Reference: Section 21.8 "AMD Bobcat and Jaguar pipeline: Dependency-breaking
+// instructions" - Agner Fog's "microarchitecture.pdf"
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2], [1, 1, 0],
+ 0, // Max moves that can be eliminated per cycle.
+ 1>; // Restrict move elimination to zero regs.
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
// retire up to two macro-ops per cycle.
@@ -93,6 +103,12 @@ def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
@@ -100,7 +116,8 @@ def : ReadAdvance<ReadAfterLd, 3>;
// folded loads.
multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -113,13 +130,14 @@ multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 3);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -132,13 +150,14 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [2], int UOps = 2> {
+ int Lat, list<int> Res = [2], int UOps = 2,
+ int LoadUOps = 0> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -151,12 +170,13 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !listconcat([2], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
-// A folded store needs a cycle on the SAGU for the store data.
-def : WriteRes<WriteRMW, [JSAGU]>;
+// A folded store needs a cycle on the SAGU for the store data,
+// most RMW instructions don't need an extra uop.
+defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
@@ -164,12 +184,24 @@ def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
-defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
-defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>;
+defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>;
+
+defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,27 +220,37 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
-def : WriteRes<WriteBitTest,[JALU01]>;
+
+defm : X86WriteRes<WriteBitTest, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [JALU01,JLAGU], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [JALU01,JLAGU], 4, [1,1], 5>;
+defm : X86WriteRes<WriteBitTestSet, [JALU01], 1, [1], 2>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [JALU01,JLAGU], 4, [1,1], 4>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [JALU01,JLAGU], 4, [1,1], 8>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
// Bit counts.
-defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
-defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WriteBSF, [JALU01], 4, [8], 7>;
+defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [8], 8>;
defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>;
-defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>;
+defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2], 2>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
+defm : JWriteResIntPair<WriteBLS, [JALU01], 2, [2], 2>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+defm : JWriteResIntPair<WriteShiftCL, [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotate, [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotateCL, [JALU01], 1>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
@@ -269,8 +311,8 @@ defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
@@ -364,21 +406,21 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
-defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
+defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [4, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [6, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
-defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
@@ -423,8 +465,8 @@ defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1],
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
@@ -462,26 +504,26 @@ defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
-defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2], 3>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
-defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
+defm : JWriteResFpuPair<WritePHMINPOS, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
-defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 1], 1>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [4, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
@@ -507,8 +549,8 @@ defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
-defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 7, [2, 2, 1, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 8, [2, 2, 1, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
@@ -527,7 +569,7 @@ def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,1,1,1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
@@ -559,13 +601,17 @@ def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
+def JWriteVecExtractF128: SchedWriteRes<[JFPU01, JFPX]>;
+def : InstRW<[JWriteVecExtractF128], (instrs VEXTRACTF128rr)>;
+
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm)>;
+def : InstRW<[JWriteVBROADCASTYLd], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VBROADCASTF128)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
@@ -587,6 +633,10 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
+def JWriteZeroIdiomYmm : SchedWriteRes<[JFPU01, JFPX]> {
+ let NumMicroOps = 2;
+}
+
// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
@@ -598,54 +648,73 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
def JWriteZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
+ SchedVar<NoSchedPred, [WriteALU]>
]>;
def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
XOR32rr, XOR64rr)>;
def JWriteFZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
+ SchedVar<NoSchedPred, [WriteFLogic]>
]>;
def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
ANDNPSrr, VANDNPSrr,
ANDNPDrr, VANDNPDrr)>;
+def JWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroIdiomYmm]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[JWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VANDNPSYrr, VANDNPDYrr)>;
+
def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
+ SchedVar<NoSchedPred, [WriteVecLogic]>
]>;
def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
]>;
def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
PANDNrr, VPANDNrr)>;
def JWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
+ SchedVar<NoSchedPred, [WriteVecALU]>
]>;
def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr,
+ MMX_PSUBUSBirr, MMX_PSUBUSWirr,
MMX_PCMPGTBirr, MMX_PCMPGTDirr,
MMX_PCMPGTWirr)>;
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
+ SchedVar<NoSchedPred, [WriteVecALUX]>
]>;
def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PSUBDrr, VPSUBDrr,
PSUBQrr, VPSUBQrr,
PSUBWrr, VPSUBWrr,
+ PSUBSBrr, VPSUBSBrr,
+ PSUBSWrr, VPSUBSWrr,
+ PSUBUSBrr, VPSUBUSBrr,
+ PSUBUSWrr, VPSUBUSWrr,
PCMPGTBrr, VPCMPGTBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTQrr, VPCMPGTQrr,
PCMPGTWrr, VPCMPGTWrr)>;
+def JWriteVPERM2F128 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomVPERMPredicate>, [JWriteZeroIdiomYmm]>,
+ SchedVar<NoSchedPred, [WriteFShuffle256]>
+]>;
+def : InstRW<[JWriteVPERM2F128], (instrs VPERM2F128rr)>;
+
// This write is used for slow LEA instructions.
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
let Latency = 2;
@@ -666,8 +735,8 @@ def JSlowLEAPredicate : MCSchedPredicate<
>;
def JWriteLEA : SchedWriteVariant<[
- SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
- SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
+ SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
+ SchedVar<NoSchedPred, [WriteLEA]>
]>;
def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
@@ -679,4 +748,91 @@ def JSlowLEA16r : SchedWriteRes<[JALU01]> {
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+ MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+ MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+ MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // xmm int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+ ], ZeroIdiomPredicate>,
+
+ DepBreakingClass<[ VPERM2F128rr ], ZeroIdiomVPERMPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsOptimizableRegisterMove<[
+ InstructionEquivalenceClass<[
+ // GPR variants.
+ MOV32rr, MOV64rr,
+
+ // MMX variants.
+ MMX_MOVQ64rr,
+
+ // SSE variants.
+ MOVAPSrr, MOVUPSrr,
+ MOVAPDrr, MOVUPDrr,
+ MOVDQArr, MOVDQUrr,
+
+ // AVX variants.
+ VMOVAPSrr, VMOVUPSrr,
+ VMOVAPDrr, VMOVUPDrr,
+ VMOVDQArr, VMOVDQUrr
+ ], TruePred >
+]>;
+
} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
index b1e843013707..fcaff7cf810f 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -49,6 +49,9 @@ def SLMFPDivider : ProcResource<1>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
@@ -95,13 +98,28 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+
+defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1, 2], 2>;
+defm : X86WriteRes<WriteXCHG, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteShiftCL, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteRotate, [SLM_IEC_RSV0], 1>;
+defm : SLMWriteResPair<WriteRotateCL, [SLM_IEC_RSV0], 1>;
defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
@@ -119,8 +137,13 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
// FIXME Latency and NumMicrOps?
let ResourceCycles = [2,1];
}
-def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
-def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
+defm : X86WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -134,8 +157,9 @@ defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 7184b850a195..a866f843106b 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -87,9 +87,14 @@ def ZnMultiplier : ProcResource<1>;
// Integer division issued on ALU2.
def ZnDivider : ProcResource<1>;
-// 4 Cycles load-to use Latency is captured
+// 4 Cycles integer load-to use Latency is captured
def : ReadAdvance<ReadAfterLd, 4>;
+// 8 Cycles vector load-to use Latency is captured
+def : ReadAdvance<ReadAfterVecLd, 8>;
+def : ReadAdvance<ReadAfterVecXLd, 8>;
+def : ReadAdvance<ReadAfterVecYLd, 8>;
+
// The Integer PRF for Zen is 168 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: "Software Optimization Guide for AMD Family 17h Processors"
@@ -177,13 +182,28 @@ def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
-defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+
+defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
+defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
-defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShiftCL, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotate, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteSHDrrcl>;
@@ -198,7 +218,13 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
-def : WriteRes<WriteBitTest,[ZnALU]>;
+
+defm : X86WriteRes<WriteBitTest, [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [ZnALU], 2, [1], 2>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
@@ -210,9 +236,10 @@ defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
+//defm : ZnWriteResPair<WriteBLS, [ZnALU], 2>;
+defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
// IDIV
defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
@@ -492,21 +519,13 @@ def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
//-- Move instructions --//
// MOV.
// r16,m.
-def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
+def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
// MOVSX, MOVZX.
// r,m.
def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
// XCHG.
-// r,r.
-def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-
-def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
-
// r,m.
def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 5;
@@ -522,7 +541,7 @@ def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
let Latency = 5;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
+def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
@@ -582,45 +601,51 @@ def : InstRW<[WriteALULd],
def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>;
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16, ZnWriteMul16>;
+def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
// m16.
def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>;
+def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
// r32.
def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>;
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32, ZnWriteMul32>;
+def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
// m32.
def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>;
+def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
// r64.
def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 4;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>;
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64, ZnWriteMul64>;
+def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
// m64.
def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>;
+def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
// MULX.
// r32,r32,r32.
@@ -696,31 +721,21 @@ def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 6;
}
-// BT.
-// m,i.
-def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
-
// BTR BTS BTC.
-// r,r,i.
-def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
-
// m,r,i.
def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
let Latency = 6;
let NumMicroOps = 2;
}
// m,r,i.
-def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
+def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
+def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
// BLSI BLSMSK BLSR.
// r,r.
-def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
+def : SchedAlias<WriteBLS, ZnWriteALULat2>;
// r,m.
-def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
+def : SchedAlias<WriteBLSLd, ZnWriteALULat2Ld>;
// CLD STD.
def : InstRW<[WriteALU], (instrs STD, CLD)>;
@@ -750,13 +765,6 @@ def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
//-- Misc instructions --//
-// CMPXCHG.
-def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
- let Latency = 8;
- let NumMicroOps = 5;
-}
-def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
-
// CMPXCHG8B.
def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
let NumMicroOps = 18;
@@ -782,10 +790,10 @@ def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
// RDRAND.
-def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[WriteMicrocoded], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
// XGETBV.
-def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
+def : InstRW<[WriteMicrocoded], (instrs XGETBV)>;
//-- String instructions --//
// CMPS.
@@ -807,6 +815,8 @@ def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
// XADD.
+def ZnXADD : SchedWriteRes<[ZnALU]>;
+def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
//=== Floating Point x87 Instructions ===//
@@ -821,16 +831,16 @@ def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
// LD_F.
// r.
-def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
+def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
// m.
def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
+def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
// FBLD.
-def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBLDm)>;
// FST(P).
// r.
@@ -840,11 +850,11 @@ def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
let Latency = 5;
}
-def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
+def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
// FBSTP.
// m80.
-def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBSTPm)>;
def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
@@ -901,10 +911,10 @@ def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
// FNSAVE.
-def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
+def : InstRW<[WriteMicrocoded], (instrs FSAVEm)>;
// FRSTOR.
-def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
+def : InstRW<[WriteMicrocoded], (instrs FRSTORm)>;
//-- Arithmetic instructions --//
@@ -1401,46 +1411,46 @@ def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
// SHA1MSG2
// x,x.
def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
-def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
+def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
// x,m.
def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
+def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
// SHA1NEXTE
// x,x.
def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
-def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
+def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
// x,m.
def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
+def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
// SHA1RNDS4
// x,x.
def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
let Latency = 6;
}
-def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
+def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
// x,m.
def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 13;
}
-def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
+def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
// SHA256RNDS2
// x,x.
def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
let Latency = 4;
}
-def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
+def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
// x,m.
def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
let Latency = 11;
}
-def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
+def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
//-- Arithmetic instructions --//
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index e131f1a1e4bd..008a9ec2ba3c 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -170,10 +170,11 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
InFlag = Chain.getValue(1);
}
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ bool Use64BitRegs = Subtarget.isTarget64BitLP64();
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
@@ -249,20 +250,21 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
if (Repeats.BytesLeft() > 0 &&
DAG.getMachineFunction().getFunction().optForMinSize()) {
- // When agressively optimizing for size, avoid generating the code to
+ // When aggressively optimizing for size, avoid generating the code to
// handle BytesLeft.
Repeats.AVT = MVT::i8;
}
}
+ bool Use64BitRegs = Subtarget.isTarget64BitLP64();
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+ Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
Src, InFlag);
InFlag = Chain.getValue(1);
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index c7ddf93f8e85..720be8afa62c 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -112,11 +112,10 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
return true;
}
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask) {
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
@@ -125,7 +124,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / 8;
assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
"Unexpected number of vector elements.");
@@ -151,12 +150,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
@@ -166,7 +163,7 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
"Unexpected number of vector elements.");
@@ -189,11 +186,13 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
}
void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
+ unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
+ assert((MaskTySize == 128 || MaskTySize == 256) &&
+ Width >= MaskTySize && "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
@@ -201,7 +200,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
unsigned NumEltsPerLane = 128 / ElSize;
assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected number of vector elements.");
@@ -242,9 +241,12 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
}
}
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
- assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
- "Unexpected vector size.");
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ (void)MaskTySize;
+ assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
// The shuffle mask requires a byte vector.
APInt UndefElts;
@@ -252,7 +254,7 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / 8;
assert(NumElts == 16 && "Unexpected number of vector elements.");
for (unsigned i = 0; i != NumElts; ++i) {
@@ -291,12 +293,10 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
@@ -307,7 +307,7 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
@@ -319,12 +319,10 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
}
}
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask) {
- Type *MaskTy = C->getType();
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
- (void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+ assert((Width == 128 || Width == 256 || Width == 512) &&
+ C->getType()->getPrimitiveSizeInBits() >= Width &&
"Unexpected vector size.");
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
"Unexpected vector element size.");
@@ -335,7 +333,7 @@ void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
- unsigned NumElts = RawMask.size();
+ unsigned NumElts = Width / ElSize;
for (unsigned i = 0; i != NumElts; ++i) {
if (UndefElts[i]) {
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index b703cbbd2b29..b08c31935d28 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -26,25 +26,28 @@ class Constant;
class MVT;
/// Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILP2 variable mask from an IR-level vector constant.
void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
+ unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPPERM variable mask from an IR-level vector constant.
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+ SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 078fe1598f13..a729161a1beb 100644
--- a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -61,7 +61,7 @@
using namespace llvm;
-#define PASS_KEY "x86-speculative-load-hardening"
+#define PASS_KEY "x86-slh"
#define DEBUG_TYPE PASS_KEY
STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
@@ -75,6 +75,11 @@ STATISTIC(NumCallsOrJumpsHardened,
STATISTIC(NumInstsInserted, "Number of instructions inserted");
STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
+static cl::opt<bool> EnableSpeculativeLoadHardening(
+ "x86-speculative-load-hardening",
+ cl::desc("Force enable speculative load hardening"), cl::init(false),
+ cl::Hidden);
+
static cl::opt<bool> HardenEdgesWithLFENCE(
PASS_KEY "-lfence",
cl::desc(
@@ -114,12 +119,6 @@ static cl::opt<bool> HardenIndirectCallsAndJumps(
"mitigate Spectre v1.2 style attacks."),
cl::init(true), cl::Hidden);
-namespace llvm {
-
-void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
-
-} // end namespace llvm
-
namespace {
class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
@@ -179,6 +178,9 @@ private:
void unfoldCallAndJumpLoads(MachineFunction &MF);
+ SmallVector<MachineInstr *, 16>
+ tracePredStateThroughIndirectBranches(MachineFunction &MF);
+
void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
unsigned saveEFLAGS(MachineBasicBlock &MBB,
@@ -401,6 +403,12 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
<< " **********\n");
+ // Only run if this pass is forced enabled or we detect the relevant function
+ // attribute requesting SLH.
+ if (!EnableSpeculativeLoadHardening &&
+ !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+ return false;
+
Subtarget = &MF.getSubtarget<X86Subtarget>();
MRI = &MF.getRegInfo();
TII = Subtarget->getInstrInfo();
@@ -522,11 +530,16 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
}
}
- // If we are going to harden calls and jumps we need to unfold their memory
- // operands.
- if (HardenIndirectCallsAndJumps)
+ if (HardenIndirectCallsAndJumps) {
+ // If we are going to harden calls and jumps we need to unfold their memory
+ // operands.
unfoldCallAndJumpLoads(MF);
+ // Then we trace predicate state through the indirect branches.
+ auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
+ CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
+ }
+
// Now that we have the predicate state available at the start of each block
// in the CFG, trace it through each block, hardening vulnerable instructions
// as we go.
@@ -809,7 +822,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
"split above!");
// Sort and unique the codes to minimize them.
- llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end());
+ llvm::sort(UncondCodeSeq);
UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
UncondCodeSeq.end());
@@ -925,6 +938,265 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
}
}
+/// Trace the predicate state through indirect branches, instrumenting them to
+/// poison the state if a target is reached that does not match the expected
+/// target.
+///
+/// This is designed to mitigate Spectre variant 1 attacks where an indirect
+/// branch is trained to predict a particular target and then mispredicts that
+/// target in a way that can leak data. Despite using an indirect branch, this
+/// is really a variant 1 style attack: it does not steer execution to an
+/// arbitrary or attacker controlled address, and it does not require any
+/// special code executing next to the victim. This attack can also be mitigated
+/// through retpolines, but those require either replacing indirect branches
+/// with conditional direct branches or lowering them through a device that
+/// blocks speculation. This mitigation can replace these retpoline-style
+/// mitigations for jump tables and other indirect branches within a function
+/// when variant 2 isn't a risk while allowing limited speculation. Indirect
+/// calls, however, cannot be mitigated through this technique without changing
+/// the ABI in a fundamental way.
+SmallVector<MachineInstr *, 16>
+X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
+ MachineFunction &MF) {
+ // We use the SSAUpdater to insert PHI nodes for the target addresses of
+ // indirect branches. We don't actually need the full power of the SSA updater
+ // in this particular case as we always have immediately available values, but
+ // this avoids us having to re-implement the PHI construction logic.
+ MachineSSAUpdater TargetAddrSSA(MF);
+ TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
+
+ // Track which blocks were terminated with an indirect branch.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
+
+ // We need to know what blocks end up reached via indirect branches. We
+ // expect this to be a subset of those whose address is taken and so track it
+ // directly via the CFG.
+ SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
+
+ // Walk all the blocks which end in an indirect branch and make the
+ // target address available.
+ for (MachineBasicBlock &MBB : MF) {
+ // Find the last terminator.
+ auto MII = MBB.instr_rbegin();
+ while (MII != MBB.instr_rend() && MII->isDebugInstr())
+ ++MII;
+ if (MII == MBB.instr_rend())
+ continue;
+ MachineInstr &TI = *MII;
+ if (!TI.isTerminator() || !TI.isBranch())
+ // No terminator or non-branch terminator.
+ continue;
+
+ unsigned TargetReg;
+
+ switch (TI.getOpcode()) {
+ default:
+ // Direct branch or conditional branch (leading to fallthrough).
+ continue;
+
+ case X86::FARJMP16m:
+ case X86::FARJMP32m:
+ case X86::FARJMP64:
+ // We cannot mitigate far jumps or calls, but we also don't expect them
+ // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
+ continue;
+
+ case X86::JMP16m:
+ case X86::JMP16m_NT:
+ case X86::JMP32m:
+ case X86::JMP32m_NT:
+ case X86::JMP64m:
+ case X86::JMP64m_NT:
+ // Mostly as documentation.
+ report_fatal_error("Memory operand jumps should have been unfolded!");
+
+ case X86::JMP16r:
+ report_fatal_error(
+ "Support for 16-bit indirect branches is not implemented.");
+ case X86::JMP32r:
+ report_fatal_error(
+ "Support for 32-bit indirect branches is not implemented.");
+
+ case X86::JMP64r:
+ TargetReg = TI.getOperand(0).getReg();
+ }
+
+ // We have definitely found an indirect branch. Verify that there are no
+ // preceding conditional branches as we don't yet support that.
+ if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
+ return !OtherTI.isDebugInstr() && &OtherTI != &TI;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found other terminators in a block with an indirect "
+ "branch! This is not yet supported! Terminator sequence:\n";
+ for (MachineInstr &MI : MBB.terminators()) {
+ MI.dump();
+ dbgs() << '\n';
+ }
+ });
+ report_fatal_error("Unimplemented terminator sequence!");
+ }
+
+ // Make the target register an available value for this block.
+ TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
+ IndirectTerminatedMBBs.insert(&MBB);
+
+ // Add all the successors to our target candidates.
+ for (MachineBasicBlock *Succ : MBB.successors())
+ IndirectTargetMBBs.insert(Succ);
+ }
+
+ // Keep track of the cmov instructions we insert so we can return them.
+ SmallVector<MachineInstr *, 16> CMovs;
+
+ // If we didn't find any indirect branches with targets, nothing to do here.
+ if (IndirectTargetMBBs.empty())
+ return CMovs;
+
+ // We found indirect branches and targets that need to be instrumented to
+ // harden loads within them. Walk the blocks of the function (to get a stable
+ // ordering) and instrument each target of an indirect branch.
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip the blocks that aren't candidate targets.
+ if (!IndirectTargetMBBs.count(&MBB))
+ continue;
+
+ // We don't expect EH pads to ever be reached via an indirect branch. If
+ // this is desired for some reason, we could simply skip them here rather
+ // than asserting.
+ assert(!MBB.isEHPad() &&
+ "Unexpected EH pad as target of an indirect branch!");
+
+ // We should never end up threading EFLAGS into a block to harden
+ // conditional jumps as there would be an additional successor via the
+ // indirect branch. As a consequence, all such edges would be split before
+ // reaching here, and the inserted block will handle the EFLAGS-based
+ // hardening.
+ assert(!MBB.isLiveIn(X86::EFLAGS) &&
+ "Cannot check within a block that already has live-in EFLAGS!");
+
+ // We can't handle having non-indirect edges into this block unless this is
+ // the only successor and we can synthesize the necessary target address.
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ // If we've already handled this by extracting the target directly,
+ // nothing to do.
+ if (IndirectTerminatedMBBs.count(Pred))
+ continue;
+
+ // Otherwise, we have to be the only successor. We generally expect this
+ // to be true as conditional branches should have had a critical edge
+ // split already. We don't however need to worry about EH pad successors
+ // as they'll happily ignore the target and their hardening strategy is
+ // resilient to all ways in which they could be reached speculatively.
+ if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
+ return Succ->isEHPad() || Succ == &MBB;
+ })) {
+ LLVM_DEBUG({
+ dbgs() << "ERROR: Found conditional entry to target of indirect "
+ "branch!\n";
+ Pred->dump();
+ MBB.dump();
+ });
+ report_fatal_error("Cannot harden a conditional entry to a target of "
+ "an indirect branch!");
+ }
+
+ // Now we need to compute the address of this block and install it as a
+ // synthetic target in the predecessor. We do this at the bottom of the
+ // predecessor.
+ auto InsertPt = Pred->getFirstTerminator();
+ unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Directly materialize it into an immediate.
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
+ TII->get(X86::MOV64ri32), TargetReg)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
+ dbgs() << "\n");
+ } else {
+ auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
+ TargetReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
+ dbgs() << "\n");
+ }
+ // And make this available.
+ TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
+ }
+
+ // Materialize the needed SSA value of the target. Note that we need the
+ // middle of the block as this block might at the bottom have an indirect
+ // branch back to itself. We can do this here because at this point, every
+ // predecessor of this block has an available value. This is basically just
+ // automating the construction of a PHI node for this target.
+ unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
+
+ // Insert a comparison of the incoming target register with this block's
+ // address. This also requires us to mark the block as having its address
+ // taken explicitly.
+ MBB.setHasAddressTaken();
+ auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // Check directly against a relocated immediate when we can.
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
+ .addReg(TargetReg, RegState::Kill)
+ .addMBB(&MBB);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ } else {
+ // Otherwise compute the address into a register first.
+ unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+ auto AddrI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addMBB(&MBB)
+ .addReg(/*Segment*/ 0);
+ ++NumInstsInserted;
+ (void)AddrI;
+ LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
+ auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
+ .addReg(TargetReg, RegState::Kill)
+ .addReg(AddrReg, RegState::Kill);
+ ++NumInstsInserted;
+ (void)CheckI;
+ LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+ }
+
+ // Now cmov over the predicate if the comparison wasn't equal.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI =
+ BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
+ .addReg(PS->InitialReg)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+ CMovs.push_back(&*CMovI);
+
+ // And put the new value into the available values for SSA form of our
+ // predicate state.
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
+ }
+
+ // Return all the newly inserted cmov instructions of the predicate state.
+ return CMovs;
+}
+
/// Returns true if the instruction has no behavior (specified or otherwise)
/// that is based on the value of any of its register operands
///
@@ -1498,13 +1770,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
// pass specifically so that we have the complete set of instructions for
// which we will do post-load hardening and can defer it in certain
// circumstances.
- //
- // FIXME: This could probably be made even more effective by doing it
- // across the entire function. Rather than just walking the flat list
- // backwards here, we could walk the function in PO and each block bottom
- // up, allowing us to in some cases sink hardening across block blocks. As
- // long as the in-block predicate state is used at the eventual hardening
- // site, this remains safe.
for (MachineInstr &MI : MBB) {
if (HardenLoads) {
// We cannot both require hardening the def of a load and its address.
@@ -1586,8 +1851,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
}
// Otherwise we have a call. We need to handle transferring the predicate
- // state into a call and recovering it after the call returns unless this
- // is a tail call.
+ // state into a call and recovering it after the call returns (unless this
+ // is a tail call).
assert(MI.isCall() && "Should only reach here for calls!");
tracePredStateThroughCall(MI);
}
@@ -2109,21 +2374,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
DebugLoc Loc = MI.getDebugLoc();
auto InsertPt = MI.getIterator();
- if (FenceCallAndRet) {
- // Simply forcibly block speculation of loads out of the function by using
- // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
- // should be secure, is simple from an ABI perspective, and the cost can be
- // minimized through inlining.
- //
- // FIXME: We should investigate ways to establish a strong data-dependency
- // on the return. However, poisoning the stack pointer is unlikely to work
- // because the return is *predicted* rather than relying on the load of the
- // return address to actually resolve.
- BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
- ++NumInstsInserted;
- ++NumLFENCEsInserted;
+ if (FenceCallAndRet)
+ // No need to fence here as we'll fence at the return site itself. That
+ // handles more cases than we can handle here.
return;
- }
// Take our predicate state, shift it to the high 17 bits (so that we keep
// pointers canonical) and merge it into RSP. This will allow the caller to
@@ -2141,31 +2395,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
///
/// For tail calls, this is all we need to do.
///
-/// For calls where we might return to control flow, we further need to extract
-/// the predicate state built up within that function from the high bits of the
-/// stack pointer, and make that the newly available predicate state.
+/// For calls where we might return and resume the control flow, we need to
+/// extract the predicate state from the high bits of the stack pointer after
+/// control returns from the called function.
+///
+/// We also need to verify that we intended to return to this location in the
+/// code. An attacker might arrange for the processor to mispredict the return
+/// to this valid but incorrect return address in the program rather than the
+/// correct one. See the paper on this attack, called "ret2spec" by the
+/// researchers, here:
+/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
+///
+/// The way we verify that we returned to the correct location is by preserving
+/// the expected return address across the call. One technique involves taking
+/// advantage of the red-zone to load the return address from `8(%rsp)` where it
+/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
+/// directly save the address into a register that will be preserved across the
+/// call. We compare this intended return address against the address
+/// immediately following the call (the observed return address). If these
+/// mismatch, we have detected misspeculation and can poison our predicate
+/// state.
void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
auto InsertPt = MI.getIterator();
DebugLoc Loc = MI.getDebugLoc();
+ if (FenceCallAndRet) {
+ if (MI.isReturn())
+ // Tail call, we don't return to this function.
+ // FIXME: We should also handle noreturn calls.
+ return;
+
+ // We don't need to fence before the call because the function should fence
+ // in its entry. However, we do need to fence after the call returns.
+ // Fencing before the return doesn't correctly handle cases where the return
+ // itself is mispredicted.
+ BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
+ ++NumInstsInserted;
+ ++NumLFENCEsInserted;
+ return;
+ }
+
// First, we transfer the predicate state into the called function by merging
// it into the stack pointer. This will kill the current def of the state.
unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
// If this call is also a return, it is a tail call and we don't need anything
- // else to handle it so just continue.
- // FIXME: We should also handle noreturn calls.
- if (MI.isReturn())
+ // else to handle it so just return. Also, if there are no further
+ // instructions and no successors, this call does not return so we can also
+ // bail.
+ if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
return;
- // We need to step past the call and recover the predicate state from SP after
- // the return, and make this new state available.
+ // Create a symbol to track the return address and attach it to the call
+ // machine instruction. We will lower extra symbols attached to call
+ // instructions as label immediately following the call.
+ MCSymbol *RetSymbol =
+ MF.getContext().createTempSymbol("slh_ret_addr",
+ /*AlwaysAddSuffix*/ true);
+ MI.setPostInstrSymbol(MF, RetSymbol);
+
+ const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
+ unsigned ExpectedRetAddrReg = 0;
+
+ // If we have no red zones or if the function returns twice (possibly without
+ // using the `ret` instruction) like setjmp, we need to save the expected
+ // return address prior to the call.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+ MF.exposesReturnsTwice()) {
+ // If we don't have red zones, we need to compute the expected return
+ // address prior to the call and store it in a register that lives across
+ // the call.
+ //
+ // In some ways, this is doubly satisfying as a mitigation because it will
+ // also successfully detect stack smashing bugs in some cases (typically,
+ // when a callee-saved register is used and the callee doesn't push it onto
+ // the stack). But that isn't our primary goal, so we only use it as
+ // a fallback.
+ //
+ // FIXME: It isn't clear that this is reliable in the face of
+ // rematerialization in the register allocator. We somehow need to force
+ // that to not occur for this particular instruction, and instead to spill
+ // or otherwise preserve the value computed *prior* to the call.
+ //
+ // FIXME: It is even less clear why MachineCSE can't just fold this when we
+ // end up having to use identical instructions both before and after the
+ // call to feed the comparison.
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
+ .addSym(RetSymbol);
+ } else {
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ }
+ }
+
+ // Step past the call to handle when it returns.
++InsertPt;
+
+ // If we didn't pre-compute the expected return address into a register, then
+ // red zones are enabled and the return address is still available on the
+ // stack immediately after the call. As the very first instruction, we load it
+ // into a register.
+ if (!ExpectedRetAddrReg) {
+ ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
+ .addReg(/*Base*/ X86::RSP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
+ // the return address is 8-bytes past it.
+ .addReg(/*Segment*/ 0);
+ }
+
+ // Now we extract the callee's predicate state from the stack pointer.
unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
- PS->SSA.AddAvailableValue(&MBB, NewStateReg);
+
+ // Test the expected return address against our actual address. If we can
+ // form this basic block's address as an immediate, this is easy. Otherwise
+ // we compute it.
+ if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+ !Subtarget->isPositionIndependent()) {
+ // FIXME: Could we fold this with the load? It would require careful EFLAGS
+ // management.
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addSym(RetSymbol);
+ } else {
+ unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
+ .addReg(/*Base*/ X86::RIP)
+ .addImm(/*Scale*/ 1)
+ .addReg(/*Index*/ 0)
+ .addSym(RetSymbol)
+ .addReg(/*Segment*/ 0);
+ BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
+ .addReg(ExpectedRetAddrReg, RegState::Kill)
+ .addReg(ActualRetAddrReg, RegState::Kill);
+ }
+
+ // Now conditionally update the predicate state we just extracted if we ended
+ // up at a different return address than expected.
+ int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+ auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+
+ unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+ auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
+ .addReg(NewStateReg, RegState::Kill)
+ .addReg(PS->PoisonReg);
+ CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+ ++NumInstsInserted;
+ LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+
+ PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
}
/// An attacker may speculatively store over a value that is then speculatively
@@ -2237,9 +2628,9 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
++NumCallsOrJumpsHardened;
}
-INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
"X86 speculative load hardener", false, false)
-INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
"X86 speculative load hardener", false, false)
FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 7e84323dda4c..0c9ce8802e1b 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -77,6 +77,8 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
if (isTargetELF()) {
switch (TM.getCodeModel()) {
// 64-bit small code model is simple: All rip-relative.
+ case CodeModel::Tiny:
+ llvm_unreachable("Tiny codesize model not supported on X86");
case CodeModel::Small:
case CodeModel::Kernel:
return X86II::MO_NO_FLAG;
@@ -139,8 +141,11 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
if (TM.shouldAssumeDSOLocal(M, GV))
return classifyLocalReference(GV);
- if (isTargetCOFF())
- return X86II::MO_DLLIMPORT;
+ if (isTargetCOFF()) {
+ if (GV->hasDLLImportStorageClass())
+ return X86II::MO_DLLIMPORT;
+ return X86II::MO_COFFSTUB;
+ }
if (is64Bit()) {
// ELF supports a large, truly PIC code model with non-PC relative GOT
@@ -220,14 +225,22 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUName.empty())
CPUName = "generic";
- // Make sure 64-bit features are available in 64-bit mode. (But make sure
- // SSE2 can be turned off explicitly.)
std::string FullFS = FS;
if (In64BitMode) {
+ // SSE2 should default to enabled in 64-bit mode, but can be turned off
+ // explicitly.
if (!FullFS.empty())
- FullFS = "+64bit,+sse2," + FullFS;
+ FullFS = "+sse2," + FullFS;
else
- FullFS = "+64bit,+sse2";
+ FullFS = "+sse2";
+
+ // If no CPU was specified, enable 64bit feature to satisy later check.
+ if (CPUName == "generic") {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
}
// LAHF/SAHF are always supported in non-64-bit mode.
@@ -262,8 +275,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
<< HasX86_64 << "\n");
- assert((!In64BitMode || HasX86_64) &&
- "64-bit code requested on a subtarget that doesn't support it!");
+ if (In64BitMode && !HasX86_64)
+ report_fatal_error("64-bit code requested on a subtarget that doesn't "
+ "support it!");
// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 85e8256a6e94..b1103f823e7f 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -52,21 +52,15 @@ enum Style {
class X86Subtarget final : public X86GenSubtargetInfo {
public:
+ // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
+ // are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
Others,
IntelAtom,
IntelSLM,
IntelGLM,
IntelGLP,
- IntelTRM,
- IntelHaswell,
- IntelBroadwell,
- IntelSkylake,
- IntelKNL,
- IntelSKX,
- IntelCannonlake,
- IntelIcelakeClient,
- IntelIcelakeServer,
+ IntelTRM
};
protected:
@@ -229,6 +223,9 @@ protected:
// PMULUDQ.
bool IsPMULLDSlow = false;
+ /// True if the PMADDWD instruction is slow compared to PMULLD.
+ bool IsPMADDWDSlow = false;
+
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow = false;
@@ -385,9 +382,23 @@ protected:
/// Processor supports PCONFIG instruction
bool HasPCONFIG = false;
+ /// Processor has a single uop BEXTR implementation.
+ bool HasFastBEXTR = false;
+
+ /// Try harder to combine to horizontal vector ops if they are fast.
+ bool HasFastHorizontalOps = false;
+
/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
- bool UseRetpoline = false;
+ bool UseRetpolineIndirectCalls = false;
+
+ /// Use a retpoline thunk or remove any indirect branch to block speculative
+ /// execution.
+ bool UseRetpolineIndirectBranches = false;
+
+ /// Deprecated flag, query `UseRetpolineIndirectCalls` and
+ /// `UseRetpolineIndirectBranches` instead.
+ bool DeprecatedUseRetpoline = false;
/// When using a retpoline thunk, call an externally provided thunk rather
/// than emitting one inside the compiler.
@@ -408,6 +419,9 @@ protected:
/// Indicates target prefers 256 bit instructions.
bool Prefer256Bit = false;
+ /// Threeway branch is profitable in this subtarget.
+ bool ThreewayBranchProfitable = false;
+
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -534,7 +548,9 @@ public:
bool hasX87() const { return HasX87; }
bool hasNOPL() const { return HasNOPL; }
- bool hasCMov() const { return HasCMov; }
+ // SSE codegen depends on cmovs, and all SSE1+ processors support them.
+ // All 64-bit processors support cmov.
+ bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -599,6 +615,7 @@ public:
bool hasPTWRITE() const { return HasPTWRITE; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
+ bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
int getGatherOverhead() const { return GatherOverhead; }
@@ -619,6 +636,8 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasFastBEXTR() const { return HasFastBEXTR; }
+ bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
@@ -648,8 +667,12 @@ public:
bool hasWAITPKG() const { return HasWAITPKG; }
bool hasPCONFIG() const { return HasPCONFIG; }
bool hasSGX() const { return HasSGX; }
+ bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
- bool useRetpoline() const { return UseRetpoline; }
+ bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
+ bool useRetpolineIndirectBranches() const {
+ return UseRetpolineIndirectBranches;
+ }
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
@@ -804,7 +827,9 @@ public:
/// If we are using retpolines, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
- bool enableIndirectBrExpand() const override { return useRetpoline(); }
+ bool enableIndirectBrExpand() const override {
+ return useRetpolineIndirectBranches();
+ }
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 374bf3daaf9b..afcb49dc2263 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -54,23 +54,10 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
-static cl::opt<bool> EnableSpeculativeLoadHardening(
- "x86-speculative-load-hardening",
- cl::desc("Enable speculative load hardening"), cl::init(false), cl::Hidden);
-
-namespace llvm {
-
-void initializeWinEHStatePassPass(PassRegistry &);
-void initializeFixupLEAPassPass(PassRegistry &);
-void initializeShadowCallStackPass(PassRegistry &);
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-void initializeX86CmovConverterPassPass(PassRegistry &);
-void initializeX86ExecutionDomainFixPass(PassRegistry &);
-void initializeX86DomainReassignmentPass(PassRegistry &);
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-
-} // end namespace llvm
+static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
+ cl::desc("Enable the conditional branch "
+ "folding pass"),
+ cl::init(false), cl::Hidden);
extern "C" void LLVMInitializeX86Target() {
// Register the target.
@@ -89,7 +76,9 @@ extern "C" void LLVMInitializeX86Target() {
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
initializeX86AvoidSFBPassPass(PR);
+ initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
+ initializeX86CondBrFoldingPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -201,10 +190,13 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
- bool JIT, bool Is64Bit) {
- if (CM)
+static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
+ bool JIT, bool Is64Bit) {
+ if (CM) {
+ if (*CM == CodeModel::Tiny)
+ report_fatal_error("Target does not support the tiny CodeModel");
return *CM;
+ }
if (JIT)
return Is64Bit ? CodeModel::Large : CodeModel::Small;
return CodeModel::Small;
@@ -221,7 +213,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, JIT, RM),
- getEffectiveCodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL),
+ getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
+ OL),
TLOF(createTLOF(getTargetTriple())) {
// Windows stack unwinder gets confused when execution flow "falls through"
// after a call to 'noreturn' function.
@@ -292,13 +285,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
}
}
- // Extract required-vector-width attribute.
+ // Extract min-legal-vector-width attribute.
unsigned RequiredVectorWidth = UINT32_MAX;
- if (F.hasFnAttribute("required-vector-width")) {
- StringRef Val = F.getFnAttribute("required-vector-width").getValueAsString();
+ if (F.hasFnAttribute("min-legal-vector-width")) {
+ StringRef Val =
+ F.getFnAttribute("min-legal-vector-width").getValueAsString();
unsigned Width;
if (!Val.getAsInteger(0, Width)) {
- Key += ",required-vector-width=";
+ Key += ",min-legal-vector-width=";
Key += Val;
RequiredVectorWidth = Width;
}
@@ -449,6 +443,8 @@ bool X86PassConfig::addGlobalInstructionSelect() {
}
bool X86PassConfig::addILPOpts() {
+ if (EnableCondBrFoldingPass)
+ addPass(createX86CondBrFolding());
addPass(&EarlyIfConverterID);
if (EnableMachineCombinerPass)
addPass(&MachineCombinerID);
@@ -473,9 +469,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86AvoidStoreForwardingBlocks());
}
- if (EnableSpeculativeLoadHardening)
- addPass(createX86SpeculativeLoadHardeningPass());
-
+ addPass(createX86SpeculativeLoadHardeningPass());
addPass(createX86FlagsCopyLoweringPass());
addPass(createX86WinAllocaExpander());
}
@@ -508,6 +502,8 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86FixupLEAs());
addPass(createX86EvexToVexInsts());
}
+ addPass(createX86DiscriminateMemOpsPass());
+ addPass(createX86InsertPrefetchPass());
}
void X86PassConfig::addPreEmitPass2() {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 5b21cd82b5b1..f5b45da0c3dc 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -53,10 +53,6 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 865462622627..36929a4f5439 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -290,11 +290,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
-
- { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -308,11 +303,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v2i64, 1 },
{ ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -328,15 +318,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
-
- { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
- { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -354,7 +335,81 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
{ ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
{ ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
+ };
+
+ // XOP has faster vXi8 shifts.
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2() && !ST->hasXOP()) {
+ if (const auto *Entry =
+ CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512BWConstCostTable[] = {
+ { ISD::SDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasBWI()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512ConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX512()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+ static const CostTblEntry AVX2ConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
+ { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX2()) {
+ if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry SSE2ConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
+ { ISD::SREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+ { ISD::SDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::SREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
+ { ISD::UREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+ { ISD::UDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
+ { ISD::UREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
{ ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
{ ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
@@ -373,7 +428,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence
};
- if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
ST->hasSSE2()) {
// pmuldq sequence.
if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
@@ -385,12 +441,8 @@ int X86TTIImpl::getArithmeticInstrCost(
if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 20;
- // XOP has faster vXi8 shifts.
- if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) ||
- !ST->hasXOP())
- if (const auto *Entry =
- CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
+ if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
@@ -560,9 +612,18 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for XOP lowering tricks.
- if (ST->hasXOP())
- if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
+ if (ST->hasXOP()) {
+ // If the right shift is constant then we'll fold the negation so
+ // it's as cheap as a left shift.
+ int ShiftISD = ISD;
+ if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ ShiftISD = ISD::SHL;
+ if (const auto *Entry =
+ CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second))
return LT.first * Entry->Cost;
+ }
static const CostTblEntry SSE2UniformShiftCostTable[] = {
// Uniform splats are cheaper for the following instructions.
@@ -771,6 +832,12 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
+
+ { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
+ { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
+
+ { ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
+ { ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
};
if (ST->hasSSE2())
@@ -780,6 +847,20 @@ int X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
+
+ { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
+
+ { ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
+
+ { ISD::ADD, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::ADD, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::ADD, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
+
+ { ISD::SUB, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::SUB, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
+ { ISD::SUB, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
};
if (ST->hasSSE1())
@@ -810,12 +891,30 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ // Treat Transpose as 2-op shuffles - there's no difference in lowering.
+ if (Kind == TTI::SK_Transpose)
+ Kind = TTI::SK_PermuteTwoSrc;
+
// For Broadcasts we are splatting the first element from the first input
// register, so only need to reference that input and all the output
// registers are the same.
if (Kind == TTI::SK_Broadcast)
LT.first = 1;
+ // Subvector extractions are free if they start at the beginning of a
+ // vector and cheap if the subvectors are aligned.
+ if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
+ int NumElts = LT.second.getVectorNumElements();
+ if ((Index % NumElts) == 0)
+ return 0;
+ std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
+ if (SubLT.second.isVector()) {
+ int NumSubElts = SubLT.second.getVectorNumElements();
+ if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+ return SubLT.first;
+ }
+ }
+
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
@@ -853,15 +952,15 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
+ {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
+ {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
- { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
- { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b
};
if (ST->hasVBMI())
@@ -870,25 +969,25 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
- { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2
+ {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
+ {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
+ {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
- { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
- { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc
- { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
- { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc
};
if (ST->hasBWI())
@@ -897,42 +996,42 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
-
- { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
-
- { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
-
- { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
- { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
- { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
- { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
- { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
+ {TTI::SK_Broadcast, MVT::v8f64, 1}, // vbroadcastpd
+ {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
+ {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
+
+ {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd
+ {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
+ {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq
+ {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
+
+ {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, // vpermt2pd
+ {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, // vpermt2ps
+ {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, // vpermt2q
+ {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1} // vpermt2d
};
if (ST->hasAVX512())
@@ -940,40 +1039,40 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
-
- { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb
- { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb
-
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2*vpshufb
+ {TTI::SK_Broadcast, MVT::v4f64, 1}, // vbroadcastpd
+ {TTI::SK_Broadcast, MVT::v8f32, 1}, // vbroadcastps
+ {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_Reverse, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
+ {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb
+
+ {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
+ {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, // vpermq
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vperm2i128 + 2*vpshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vpermpd + vblendpd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 3 }, // 2*vpermps + vblendps
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vpermq + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 3 }, // 2*vpermd + vpblendd
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 7 }, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 7 }, // 2*vperm2i128 + 4*vpshufb
- // + vpblendvb
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vpermpd + vblendpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, // 2*vpermps + vblendps
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vpermq + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
+ // + vpblendvb
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb
+ // + vpblendvb
};
if (ST->hasAVX2())
@@ -981,21 +1080,21 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry XOPShuffleTbl[] = {
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vpermil2pd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 + vpermil2ps
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vpermil2pd
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 + vpermil2ps
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 + 2*vpperm
- // + vinsertf128
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 + 2*vpperm
- // + vinsertf128
-
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 + 6*vpperm
- // + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vpermil2pd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, // vperm2f128 + vpermil2ps
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vpermil2pd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, // vperm2f128 + vpermil2ps
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vextractf128 + 2*vpperm
+ // + vinsertf128
+
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpperm
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, // vpperm
};
if (ST->hasXOP())
@@ -1003,46 +1102,46 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
- { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
-
- { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor
-
- { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vshufpd
- { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vshufpd
- { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb
+ {TTI::SK_Broadcast, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ {TTI::SK_Select, MVT::v4i64, 1}, // vblendpd
+ {TTI::SK_Select, MVT::v4f64, 1}, // vblendpd
+ {TTI::SK_Select, MVT::v8i32, 1}, // vblendps
+ {TTI::SK_Select, MVT::v8f32, 1}, // vblendps
+ {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
- { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd
- { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd
- { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
- { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15 }, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
- { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15 }, // 2*vextractf128 + 8*pshufb
- // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, // 2*vperm2f128 + vshufpd
+ {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
};
if (ST->hasAVX())
@@ -1050,12 +1149,12 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Select, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb
+ {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
+ {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+ {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
+ {TTI::SK_Select, MVT::v4f32, 1}, // blendps
+ {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
+ {TTI::SK_Select, MVT::v16i8, 1} // pblendvb
};
if (ST->hasSSE41())
@@ -1063,20 +1162,20 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
- { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
- { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por
- { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
- { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3 }, // 2*pshufb + por
- { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 }, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
};
if (ST->hasSSSE3())
@@ -1084,29 +1183,29 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
- { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
-
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- { TTI::SK_Select, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por
-
- { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_PermuteSingleSrc, MVT::v8i16, 5 }, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
+
+ {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ {TTI::SK_Select, MVT::v2i64, 1}, // movsd
+ {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+ {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
+ {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
+ {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
+
+ {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
+ {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
+ {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + 2*packus
@@ -1145,6 +1244,27 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// FIXME: Need a better design of the cost table to handle non-simple types of
// potential massive combinations (elem_num x src_type x dst_type).
+ static const TypeConversionCostTblEntry AVX512BWConversionTbl[] {
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+
+ // Mask zero extend is a load + broadcast.
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ };
+
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
@@ -1208,8 +1328,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
@@ -1231,12 +1349,16 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
+
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
@@ -1328,13 +1450,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 },
// The generic code to compute the scalar overhead is currently broken.
// Workaround this limitation by estimating the scalarization overhead
// here. We have roughly 10 instructions per scalar element.
// Multiply that by the vector width.
// FIXME: remove that when PR19268 is fixed.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 10 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 20 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
@@ -1387,6 +1509,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1408,11 +1531,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
+
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
@@ -1465,43 +1590,51 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
- if (ST->hasDQI())
- if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
- return Entry->Cost;
+ MVT SimpleSrcTy = SrcTy.getSimpleVT();
+ MVT SimpleDstTy = DstTy.getSimpleVT();
- if (ST->hasAVX512())
- if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
- return Entry->Cost;
+ // Make sure that neither type is going to be split before using the
+ // AVX512 tables. This handles -mprefer-vector-width=256
+ // with -min-legal-vector-width<=256
+ if (TLI->getTypeAction(SimpleSrcTy) != TargetLowering::TypeSplitVector &&
+ TLI->getTypeAction(SimpleDstTy) != TargetLowering::TypeSplitVector) {
+ if (ST->hasBWI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+
+ if (ST->hasDQI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+ }
if (ST->hasAVX2()) {
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasSSE41()) {
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
if (ST->hasSSE2()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
- DstTy.getSimpleVT(),
- SrcTy.getSimpleVT()))
+ SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
@@ -1629,6 +1762,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v16i32, 14 },
{ ISD::CTTZ, MVT::v32i16, 12 },
{ ISD::CTTZ, MVT::v64i8, 9 },
+ { ISD::SADDSAT, MVT::v32i16, 1 },
+ { ISD::SADDSAT, MVT::v64i8, 1 },
+ { ISD::SSUBSAT, MVT::v32i16, 1 },
+ { ISD::SSUBSAT, MVT::v64i8, 1 },
+ { ISD::UADDSAT, MVT::v32i16, 1 },
+ { ISD::UADDSAT, MVT::v64i8, 1 },
+ { ISD::USUBSAT, MVT::v32i16, 1 },
+ { ISD::USUBSAT, MVT::v64i8, 1 },
};
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 36 },
@@ -1639,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTPOP, MVT::v16i32, 24 },
{ ISD::CTTZ, MVT::v8i64, 20 },
{ ISD::CTTZ, MVT::v16i32, 28 },
+ { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
+ { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -1674,6 +1819,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v8i32, 14 },
{ ISD::CTTZ, MVT::v16i16, 12 },
{ ISD::CTTZ, MVT::v32i8, 9 },
+ { ISD::SADDSAT, MVT::v16i16, 1 },
+ { ISD::SADDSAT, MVT::v32i8, 1 },
+ { ISD::SSUBSAT, MVT::v16i16, 1 },
+ { ISD::SSUBSAT, MVT::v32i8, 1 },
+ { ISD::UADDSAT, MVT::v16i16, 1 },
+ { ISD::UADDSAT, MVT::v32i8, 1 },
+ { ISD::USUBSAT, MVT::v16i16, 1 },
+ { ISD::USUBSAT, MVT::v32i8, 1 },
+ { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
@@ -1701,6 +1855,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
{ ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
@@ -1721,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
};
static const CostTblEntry SSE42CostTbl[] = {
+ { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
@@ -1765,6 +1929,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v4i32, 18 },
{ ISD::CTTZ, MVT::v8i16, 16 },
{ ISD::CTTZ, MVT::v16i8, 13 },
+ { ISD::SADDSAT, MVT::v8i16, 1 },
+ { ISD::SADDSAT, MVT::v16i8, 1 },
+ { ISD::SSUBSAT, MVT::v8i16, 1 },
+ { ISD::SSUBSAT, MVT::v16i8, 1 },
+ { ISD::UADDSAT, MVT::v8i16, 1 },
+ { ISD::UADDSAT, MVT::v16i8, 1 },
+ { ISD::USUBSAT, MVT::v8i16, 1 },
+ { ISD::USUBSAT, MVT::v16i8, 1 },
{ ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
};
@@ -1800,76 +1972,180 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::cttz:
ISD = ISD::CTTZ;
break;
+ case Intrinsic::sadd_sat:
+ ISD = ISD::SADDSAT;
+ break;
+ case Intrinsic::ssub_sat:
+ ISD = ISD::SSUBSAT;
+ break;
+ case Intrinsic::uadd_sat:
+ ISD = ISD::UADDSAT;
+ break;
+ case Intrinsic::usub_sat:
+ ISD = ISD::USUBSAT;
+ break;
case Intrinsic::sqrt:
ISD = ISD::FSQRT;
break;
}
- // Legalize the type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
- MVT MTy = LT.second;
+ if (ISD != ISD::DELETED_NODE) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ MVT MTy = LT.second;
- // Attempt to lookup cost.
- if (ST->isGLM())
- if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ // Attempt to lookup cost.
+ if (ST->isGLM())
+ if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasCDI())
- if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasCDI())
+ if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX512())
- if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasXOP())
- if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->is64Bit())
- if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF) {
+ static const CostTblEntry AVX512CostTbl[] = {
+ { ISD::ROTL, MVT::v8i64, 1 },
+ { ISD::ROTL, MVT::v4i64, 1 },
+ { ISD::ROTL, MVT::v2i64, 1 },
+ { ISD::ROTL, MVT::v16i32, 1 },
+ { ISD::ROTL, MVT::v8i32, 1 },
+ { ISD::ROTL, MVT::v4i32, 1 },
+ { ISD::ROTR, MVT::v8i64, 1 },
+ { ISD::ROTR, MVT::v4i64, 1 },
+ { ISD::ROTR, MVT::v2i64, 1 },
+ { ISD::ROTR, MVT::v16i32, 1 },
+ { ISD::ROTR, MVT::v8i32, 1 },
+ { ISD::ROTR, MVT::v4i32, 1 }
+ };
+ // XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y))
+ static const CostTblEntry XOPCostTbl[] = {
+ { ISD::ROTL, MVT::v4i64, 4 },
+ { ISD::ROTL, MVT::v8i32, 4 },
+ { ISD::ROTL, MVT::v16i16, 4 },
+ { ISD::ROTL, MVT::v32i8, 4 },
+ { ISD::ROTL, MVT::v2i64, 1 },
+ { ISD::ROTL, MVT::v4i32, 1 },
+ { ISD::ROTL, MVT::v8i16, 1 },
+ { ISD::ROTL, MVT::v16i8, 1 },
+ { ISD::ROTR, MVT::v4i64, 6 },
+ { ISD::ROTR, MVT::v8i32, 6 },
+ { ISD::ROTR, MVT::v16i16, 6 },
+ { ISD::ROTR, MVT::v32i8, 6 },
+ { ISD::ROTR, MVT::v2i64, 2 },
+ { ISD::ROTR, MVT::v4i32, 2 },
+ { ISD::ROTR, MVT::v8i16, 2 },
+ { ISD::ROTR, MVT::v16i8, 2 }
+ };
+ static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::ROTL, MVT::i64, 1 },
+ { ISD::ROTR, MVT::i64, 1 },
+ { ISD::FSHL, MVT::i64, 4 }
+ };
+ static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::ROTL, MVT::i32, 1 },
+ { ISD::ROTL, MVT::i16, 1 },
+ { ISD::ROTL, MVT::i8, 1 },
+ { ISD::ROTR, MVT::i32, 1 },
+ { ISD::ROTR, MVT::i16, 1 },
+ { ISD::ROTR, MVT::i8, 1 },
+ { ISD::FSHL, MVT::i32, 4 },
+ { ISD::FSHL, MVT::i16, 4 },
+ { ISD::FSHL, MVT::i8, 4 }
+ };
+
+ unsigned ISD = ISD::DELETED_NODE;
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::fshl:
+ ISD = ISD::FSHL;
+ if (Args[0] == Args[1])
+ ISD = ISD::ROTL;
+ break;
+ case Intrinsic::fshr:
+ // FSHR has same costs so don't duplicate.
+ ISD = ISD::FSHL;
+ if (Args[0] == Args[1])
+ ISD = ISD::ROTR;
+ break;
+ }
+
+ if (ISD != ISD::DELETED_NODE) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ MVT MTy = LT.second;
+
+ // Attempt to lookup cost.
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
}
@@ -2341,11 +2617,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return TTI::TCC_Free;
ImmIdx = 1;
break;
- case Instruction::Mul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
+ // Division by constant is typically expanded later into a different
+ // instruction sequence. This completely changes the constants.
+ // Report them as "free" to stop ConstantHoist from marking them as opaque.
+ return TTI::TCC_Free;
+ case Instruction::Mul:
case Instruction::Or:
case Instruction::Xor:
ImmIdx = 1;
@@ -2690,6 +2970,9 @@ X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
Options.LoadSizes.push_back(4);
Options.LoadSizes.push_back(2);
Options.LoadSizes.push_back(1);
+ // All GPR and vector loads can be unaligned. SIMD compare requires integer
+ // vectors (SSE2/AVX2).
+ Options.AllowOverlappingLoads = true;
return Options;
}();
return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
@@ -2718,7 +3001,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps).
@@ -2827,7 +3117,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
+
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -2945,7 +3242,9 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
@@ -2957,11 +3256,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
if (ST->hasAVX2())
return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 3df899038820..1637592c81f8 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -101,13 +101,19 @@ public:
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace);
+ unsigned Alignment, unsigned AddressSpace,
+ bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
int getIntImmCost(int64_t);
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index dde9c734f492..185deda97c1f 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -34,10 +34,6 @@ using namespace llvm;
#define DEBUG_TYPE "winehstate"
-namespace llvm {
-void initializeWinEHStatePassPass(PassRegistry &);
-}
-
namespace {
const int OverdefinedState = INT_MIN;
@@ -369,7 +365,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
// Insert an unlink before all returns.
for (BasicBlock &BB : *F) {
- TerminatorInst *T = BB.getTerminator();
+ Instruction *T = BB.getTerminator();
if (!isa<ReturnInst>(T))
continue;
Builder.SetInsertPoint(T);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index b87c149a36dc..fff8a66d0e75 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -151,7 +151,7 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
Offset,
FramePtr));
}
- llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+ llvm::sort(SpillList, CompareSSIOffset);
}
/// Creates an ordered list of EH info register 'spills'.
@@ -170,7 +170,7 @@ static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
SpillList.push_back(
StackSlotInfo(EHSlot[0], MFI.getObjectOffset(EHSlot[1]),
TL->getExceptionSelectorRegister(PersonalityFn)));
- llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+ llvm::sort(SpillList, CompareSSIOffset);
}
static MachineMemOperand *getFrameIndexMMO(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1a56d1fd6e2f..1688c38efc1d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -150,11 +150,10 @@ void XCoreDAGToDAGISel::Select(SDNode *N) {
SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
MVT::Other, CPIdx,
CurDAG->getEntryNode());
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] =
+ MachineMemOperand *MemOp =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
MachineMemOperand::MOLoad, 4, 4);
- cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(node), {MemOp});
ReplaceNode(N, node);
return;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 99e76144cba3..75d7ae7048a1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -403,8 +403,7 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset(
static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
{
- KnownBits Known;
- DAG.computeKnownBits(Value, Known);
+ KnownBits Known = DAG.computeKnownBits(Value);
return Known.countMinTrailingZeros() >= 2;
}
@@ -1649,10 +1648,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
@@ -1672,10 +1670,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
@@ -1688,10 +1685,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
- KnownBits Known;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.computeKnownBits(N2, Known);
+ KnownBits Known = DAG.computeKnownBits(N2);
if ((Known.Zero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 1c93ba8fa14c..7455cd997ad6 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -129,7 +129,7 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
do {
SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end());
- llvm::sort(WUsers.begin(), WUsers.end());
+ llvm::sort(WUsers);
WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
while (!WUsers.empty())
if (WeakTrackingVH WU = WUsers.pop_back_val()) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 9451a05d8d58..2e9fd98ed34f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -33,8 +33,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool enableMultipleCopyHints() const override { return true; }
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 38925bfd51b0..2aa9932e2465 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,7 +31,8 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
+static CodeModel::Model
+getEffectiveXCoreCodeModel(Optional<CodeModel::Model> CM) {
if (CM) {
if (*CM != CodeModel::Small && *CM != CodeModel::Large)
report_fatal_error("Target only supports CodeModel Small or Large");
@@ -51,7 +52,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OL),
+ getEffectiveXCoreCodeModel(CM), OL),
TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();